Python RunPython Exemples, share.normalize.tools.RunPython Python Exemples

Exemple #1

0

Afficher le fichier

class PersonnelAgent(Parser):
    schema = tools.GuessAgentType(
        tools.RunPython('combine_first_last_name', ctx))

    name = tools.RunPython('combine_first_last_name', ctx)
    location = tools.RunPython('get_address', ctx['Contact_Address'])

    class Extra:
        role = tools.Try(ctx.Role)
        url = tools.Try(ctx.Data_Center_URL)

    def combine_first_last_name(self, ctx):
        return ctx['First_Name'] + ' ' + ctx['Last_Name']

    def get_address(self, ctx):
        address = ctx['Address']
        if isinstance(address, list):
            address1 = address[0]
            address2 = address[1]
            return format_address(address1=address1,
                                  address2=address2,
                                  city=ctx['City'],
                                  state_or_province=ctx['Province_or_State'],
                                  postal_code=ctx['Postal_Code'],
                                  country=ctx['Country'])

        return format_address(address1=ctx['Address'],
                              address2=address2,
                              city=ctx['City'],
                              state_or_province=ctx['Province_or_State'],
                              postal_code=ctx['Postal_Code'],
                              country=ctx['Country'])

Exemple #2

0

Afficher le fichier

Fichier : normalizer.py Projet : sheriefvt/SHARE

class CreativeWork(Parser):
    schema = tools.RunPython('get_type', ctx)

    title = tools.RunPython('get_title', ctx)
    description = Soup(ctx, 'p', class_='genericfile_description')['#text']
    date_published = tools.ParseDate(
        Soup(ctx, itemprop='datePublished')['#text'])
    date_updated = tools.ParseDate(Soup(ctx, itemprop='dateModified')['#text'])
    rights = tools.OneOf(tools.RunPython('get_rights_url', ctx),
                         tools.RunPython('get_dd', ctx, 'Rights')['#text'],
                         tools.Static(None))
    language = tools.Try(
        tools.ParseLanguage(Soup(ctx, itemprop='inLanguage')['#text']))

    tags = tools.Map(tools.Delegate(ThroughTags), Soup(ctx,
                                                       itemprop='keywords'))

    identifiers = tools.Map(
        tools.Delegate(WorkIdentifier),
        tools.Try(tools.RunPython('get_dd', ctx, 'Permanent Link')),
    )

    related_agents = tools.Concat(
        tools.Map(tools.Delegate(Creator), Soup(ctx, itemprop='creator')),
        tools.Map(tools.Delegate(Contributor), Soup(ctx,
                                                    itemprop='contributor')),
        tools.Map(tools.Delegate(Publisher), Soup(ctx, itemprop='publisher')),
    )

    class Extra:
        gwu_unit = tools.RunPython('get_dd', ctx, 'GW Unit')['#text']
        related_url = tools.RunPython('get_dd', ctx, 'Related URL')['#text']
        previous_publication_information = tools.RunPython(
            'get_dd', ctx, 'Previous Publication Information')['#text']
        depositor = tools.RunPython('get_dd', ctx, 'Depositor')['#text']
        characterization = tools.RunPython('get_dd', ctx,
                                           'Characterization')['#text']

    def get_type(self, obj):
        return {
            'http://schema.org/CreativeWork': 'CreativeWork',
            'http://schema.org/Article': 'Article',
            'http://schema.org/Book': 'Book',
        }.get(obj.soup.find('div')['itemtype'], 'CreativeWork')

    def get_title(self, obj):
        title = obj.h1.soup
        title.find('span', class_='label').decompose()
        return title.get_text()

    def get_dd(self, obj, dt):
        dt_tag = obj.soup.find('dt', string=dt)
        if dt_tag:
            return SoupXMLDict(soup=dt_tag.find_next_sibling('dd'))
        return None

    def get_rights_url(self, obj):
        dd = self.get_dd(obj, 'Rights')
        return dd.soup.find('i', class_='glyphicon-new-window').parent['href']

Exemple #3

0

Afficher le fichier

Fichier : normalizer.py Projet : sheriefvt/SHARE

 class Extra:
     gwu_unit = tools.RunPython('get_dd', ctx, 'GW Unit')['#text']
     related_url = tools.RunPython('get_dd', ctx, 'Related URL')['#text']
     previous_publication_information = tools.RunPython(
         'get_dd', ctx, 'Previous Publication Information')['#text']
     depositor = tools.RunPython('get_dd', ctx, 'Depositor')['#text']
     characterization = tools.RunPython('get_dd', ctx,
                                        'Characterization')['#text']

Exemple #4

0

Afficher le fichier

class RelatedLink(Parser):
    schema = 'Link'

    url = tools.RunPython(force_text, ctx)
    type = tools.RunPython('lower', tools.Try(ctx['@relatedIdentifierType']))

    def lower(self, type):
        return type.lower()

Exemple #5

0

Afficher le fichier

class Registration(Parser):
    title = ctx[FIELDS['title']]
    description = ctx[FIELDS['summary']]
    date_published = tools.ParseDate(
        ctx[FIELDS['registration date']].timestamp)
    date_updated = tools.ParseDate(ctx[FIELDS['registration date']].timestamp)
    related_agents = tools.Concat(
        tools.Delegate(PrincipalInvestigator,
                       ctx[FIELDS['primary investigator']]),
        tools.Delegate(OtherInvestigator, ctx[FIELDS['other investigator']]),
        tools.Map(
            tools.Delegate(AdditionalInvestigator),
            tools.RunPython('split_names',
                            ctx[FIELDS['additional investigators']])))
    identifiers = tools.Map(tools.Delegate(WorkIdentifier),
                            tools.RunPython('get_link', ctx.id))

    class Extra:
        registration_date = ctx[FIELDS['registration date']]
        questions_and_objectives = ctx[FIELDS['questions and objectives']]
        study_type = ctx[FIELDS['study type']]
        study_type_detail = ctx[FIELDS['study type other']]
        contact_details = ctx[FIELDS['contact details']]
        participating_institutions = ctx[FIELDS['participating institutions']]
        countries_of_recruitment = ctx[FIELDS['countries of recruitment']]
        funders = ctx[FIELDS['funders']]
        problems_studied = ctx[FIELDS['health conditions or problems studied']]
        patient_population = ctx[FIELDS['patient population']]
        interventions = ctx[FIELDS['interventions']]
        inclusion_criteria = ctx[FIELDS['inclusion criteria']]
        exclusion_criteria = ctx[FIELDS['exclusion criteria']]
        control_or_comparators = ctx[FIELDS['control or comparators']]
        primary_outcomes = ctx[FIELDS['primary outcomes']]
        key_secondary_outcomes = ctx[FIELDS['key secondary outcomes']]
        target_sample_size = ctx[FIELDS['target sample size']]
        recruitment_status = ctx[FIELDS['recruitment status']]
        other_recruitment_status = ctx[FIELDS['other recruitment status']]
        first_enrollment_date = ctx[FIELDS['first enrollment date']]
        expected_enrollment_completion_date = ctx[
            FIELDS['expected enrollment completion date']]
        expected_research_completion_date = ctx[
            FIELDS['expected research completion date']]
        ethical_approval = ctx[FIELDS['ethical approval']]
        ethical_approval_details = ctx[FIELDS['ethical approval details']]
        ethical_committee_judgment = ctx[FIELDS['ethical committee judgment']]
        data = ctx[FIELDS['data']]
        published_paper = ctx[FIELDS['published paper identifier']]
        study_website = ctx[FIELDS['study website']]
        study_results = ctx[FIELDS['study results']]

    def get_link(self, id):
        return LINK_FORMAT.format(id)

    def split_names(self, obj):
        if not obj:
            return None
        return obj.split(',')

Exemple #6

0

Afficher le fichier

Fichier : v1_push.py Projet : sheriefvt/SHARE

class CreativeWork(Parser):
    title = ctx.title
    description = tools.Try(ctx.description)
    is_deleted = tools.RunPython('_is_deleted', tools.Try(ctx.otherProperties))
    date_updated = tools.ParseDate(tools.Try(ctx.providerUpdatedDateTime))
    rights = tools.Join(tools.Try(ctx.licenses.uri))

    # Note: this is only taking the first language in the case of multiple languages
    language = tools.ParseLanguage(tools.Try(ctx.languages[0]), )

    related_agents = tools.Concat(
        tools.Map(tools.Delegate(Creator), tools.Try(ctx.contributors)),
        tools.Map(tools.Delegate(Publisher), tools.Try(ctx.publisher)),
        tools.Map(tools.Delegate(Funder), tools.Try(ctx.sponsorships)))

    identifiers = tools.Map(
        tools.Delegate(WorkIdentifier),
        tools.Map(
            tools.IRI(),
            tools.RunPython(
                'unique',
                tools.Concat(tools.Try(ctx.uris.canonicalUri),
                             tools.Try(ctx.uris.providerUris),
                             tools.Try(ctx.uris.descriptorUris),
                             tools.Try(ctx.uris.objectUris)))))

    subjects = tools.Map(tools.Delegate(ThroughSubjects),
                         tools.Subjects(tools.Try(ctx.subjects)))

    tags = tools.Map(tools.Delegate(ThroughTags), tools.Try(ctx.tags),
                     tools.Try(ctx.subjects))

    class Extra:
        """
        Fields that are combined in the base parser are relisted as singular elements that match
        their original entry to preserve raw data structure.
        """
        freeToRead = tools.Try(ctx.freeToRead)
        languages = tools.Try(ctx.languages)
        licenses = tools.Try(ctx.licenses)
        otherProperties = tools.Try(ctx.otherProperties)
        publisher = tools.Try(ctx.publisher)
        subjects = tools.Try(ctx.subjects)
        sponsorships = tools.Try(ctx.sponsorships)
        tags = tools.Try(ctx.tags)
        uris = tools.Try(ctx.uris)
        version = tools.Try(ctx.version)

    def unique(self, items):
        return list(sorted(set(items)))

    def _is_deleted(self, properties):
        for prop in properties or []:
            if prop['name'] == 'status':
                return 'deleted' in prop['properties'].get('status', [])
        return False

Exemple #7

0

Afficher le fichier

Fichier : normalizer.py Projet : sheriefvt/SHARE

class Organization(Parser):
    schema = tools.GuessAgentType(ctx)

    name = tools.RunPython('get_name', ctx)
    location = tools.RunPython('get_location', ctx)

    def get_name(self, context):
        return context.split(',')[0]

    def get_location(self, context):
        spl = context.partition(',')
        if len(spl) > 1:
            return spl[-1]
        return None

Exemple #8

0

Afficher le fichier

Fichier : oai.py Projet : alexschiller/SHARE

class OAIAgentWorkRelation(Parser):
    schema = 'AgentWorkRelation'

    agent = tools.Delegate(OAIAgent, tools.RunPython('force_text', ctx))
    cited_as = tools.RunPython('force_text', ctx)

    def force_text(self, data):
        if isinstance(data, dict):
            return data['#text']

        if isinstance(data, str):
            return data

        raise TypeError(data)

Exemple #9

0

Afficher le fichier

 class Extra:
     name_identifier = tools.Try(
         tools.RunPython(force_text, ctx.nameIdentifier))
     name_identifier_scheme = tools.Try(
         ctx.nameIdentifier['@nameIdentifierScheme'])
     name_identifier_scheme_uri = tools.Try(
         ctx.nameIdentifier['@schemeURI'])

Exemple #10

0

Afficher le fichier

Fichier : normalizer.py Projet : sf2ne/SHARE

class Link(Parser):
    url = tools.RunPython('format_doi', ctx)
    # identifier will always be DOI
    type = tools.Static('doi')

    def format_doi(self, doi):
        return format_doi_as_url(self, doi)

Exemple #11

0

Afficher le fichier

class Link(Parser):
    url = tools.RunPython('format_url', ctx)
    type = tools.Static('provider')

    def format_url(self, ctx):
        return 'https://www.nsf.gov/awardsearch/showAward?AWD_ID={}'.format(
            ctx['id'])

Exemple #12

0

Afficher le fichier

class FunderAgent(Parser):
    schema = tools.GuessAgentType(tools.OneOf(ctx.funderName,
                                              ctx.contributorName),
                                  default='organization')

    name = tools.OneOf(ctx.funderName, ctx.contributorName)

    identifiers = tools.Map(
        tools.Delegate(AgentIdentifier),
        tools.Try(tools.IRI(
            tools.OneOf(ctx.funderIdentifier,
                        tools.RunPython(force_text, ctx.nameIdentifier),
                        tools.Static(None))),
                  exceptions=(ValueError, )))

    class Extra:
        name_identifier = tools.Try(ctx.nameIdentifier)
        name_identifier_scheme = tools.Try(
            ctx.nameIdentifier['@nameIdentifierScheme'])
        name_identifier_scheme_uri = tools.Try(
            ctx.nameIdentifier['@schemeURI'])

        funder_identifier = tools.Try(ctx.funderIdentifier)
        funder_identifier_type = tools.Try(ctx.funderIdentifierType)

        contributor_type = tools.Try(ctx.contributorType)

Exemple #13

0

Afficher le fichier

class Identifier(Parser):
    url = ctx
    base_url = tools.RunPython('get_base_url', ctx)

    def get_base_url(self, url):
        url = furl.furl(url)
        return '{}://{}'.format(url.scheme, url.host)

Exemple #14

0

Afficher le fichier

Fichier : normalizer.py Projet : sheriefvt/SHARE

class Person(Parser):
    given_name = tools.OneOf(
        ctx.embeds.users.data.attributes.given_name,
        ctx.embeds.users.errors[0].meta.given_name,
    )
    family_name = tools.OneOf(
        ctx.embeds.users.data.attributes.family_name,
        ctx.embeds.users.errors[0].meta.family_name,
    )
    additional_name = tools.OneOf(
        ctx.embeds.users.data.attributes.middle_names,
        ctx.embeds.users.errors[0].meta.middle_names,
    )
    suffix = tools.OneOf(
        ctx.embeds.users.data.attributes.suffix,
        ctx.embeds.users.errors[0].meta.suffix,
    )

    identifiers = tools.Map(
        tools.Delegate(AgentIdentifier),
        tools.RunPython('registered', ctx.embeds.users.data.links.html),
        tools.Try(ctx.embeds.users.data.links.profile_image),
    )

    class Extra:
        locale = tools.Try(ctx.embeds.users.data.attributes.locale)
        date_registered = tools.Try(
            ctx.embeds.users.data.attributes.date_registered)
        active = tools.Try(ctx.embeds.users.data.attributes.active)
        timezone = tools.Try(ctx.embeds.users.data.attributes.timezone)

    def registered(self, context):
        if self.context['attributes']['unregistered_contributor']:
            return None
        return context

Exemple #15

0

Afficher le fichier

Fichier : normalizer.py Projet : sf2ne/SHARE

class Preprint(Parser):

    title = ctx.entry.title
    description = ctx.entry.summary
    date_published = tools.ParseDate(ctx.entry.published)
    date_updated = tools.ParseDate(ctx.entry.updated)
    contributors = tools.Map(tools.Delegate(Contributor), ctx.entry.author)

    links = tools.Map(tools.Delegate(ThroughLinks),
                      tools.Try(ctx.entry['arxiv:doi']), ctx.entry.id)

    subjects = tools.Map(
        tools.Delegate(ThroughSubjects),
        tools.Subjects(
            tools.RunPython('get_subjects',
                            tools.Concat(tools.Try(ctx.entry.category)))))

    tags = tools.Map(tools.Delegate(ThroughTags), ctx.entry.category)

    class Extra:

        resource_id = ctx.entry.id
        journal_ref = tools.Try(ctx.entry['arxiv:journal_ref'])
        comment = tools.Try(ctx.entry['arxiv:comment'])
        primary_category = tools.Try(ctx.entry['arxiv:primary_category'])

    def get_subjects(self, link):
        return list(map((lambda category: category['@term']), link))

Exemple #16

0

Afficher le fichier

Fichier : normalizer.py Projet : sf2ne/SHARE

class Link(Parser):
    url = tools.RunPython('format_link', ctx)
    type = tools.RunPython('get_link_type', ctx)

    def get_link_type(self, link):
        if 'doi' in link:
            return 'doi'
        if self.config.home_page and self.config.home_page in link:
            return 'provider'
        return 'misc'

    def format_link(self, link):
        link_type = self.get_link_type(link)
        if link_type == 'doi':
            return format_doi_as_url(self, link)
        return link

Exemple #17

0

Afficher le fichier

Fichier : normalizer.py Projet : sf2ne/SHARE

class Person(Parser):
    suffix = tools.ParseName(
        tools.RunPython('combine_first_last_name', ctx)
    ).suffix
    family_name = tools.ParseName(
        tools.RunPython('combine_first_last_name', ctx)
    ).last
    given_name = tools.ParseName(
        tools.RunPython('combine_first_last_name', ctx)
    ).first
    additional_name = tools.ParseName(
        tools.RunPython('combine_first_last_name', ctx)
    ).middle
    location = tools.RunPython('get_address', ctx['Contact_Address'])

    class Extra:
        role = tools.Maybe(ctx, 'Role')

    def combine_first_last_name(self, ctx):
        return ctx['First_Name'] + ' ' + ctx['Last_Name']

    def get_address(self, ctx):
        address = ctx['Address']
        if isinstance(address, list):
            address1 = address[0]
            address2 = address[1]
            return format_address(
                self,
                address1=address1,
                address2=address2,
                city=ctx['City'],
                state_or_province=ctx['Province_or_State'],
                postal_code=ctx['Postal_Code'],
                country=ctx['Country']
            )

        return format_address(
            self,
            address1=ctx['Address'],
            address2=address2,
            city=ctx['City'],
            state_or_province=ctx['Province_or_State'],
            postal_code=ctx['Postal_Code'],
            country=ctx['Country']
        )

Exemple #18

0

Afficher le fichier

class Venue(Parser):
    name = tools.Try(tools.RunPython(force_text, ctx.geoLocationPlace))

    # polygon = tools.Try(ctx.geoLocationBox)
    # point = tools.Try(ctx.geoLocationPoint)

    class Extra:
        polygon = tools.Try(ctx.geoLocationBox)
        point = tools.Try(ctx.geoLocationPoint)

Exemple #19

0

Afficher le fichier

Fichier : normalizer.py Projet : sheriefvt/SHARE

class Agent(Parser):
    schema = tools.RunPython('get_type', ctx)
    name = Soup(ctx, itemprop='name')['#text']

    def get_type(self, obj):
        return {
            'http://schema.org/Person': 'Person',
            'http://schema.org/Organization': 'Organization',
        }[obj.soup['itemtype']]

Exemple #20

0

Afficher le fichier

Fichier : normalizer.py Projet : sf2ne/SHARE

class Organization(Parser):
    ORGANIZATION_KEYWORDS = (
        'the',
        'center'
    )

    name = tools.RunPython('combine_name', ctx)
    url = tools.Maybe(ctx, 'Data_Center_URL')
    # TODO: handle when personnel are organizations
    affiliations = tools.Map(
        tools.Delegate(Affiliation),
        tools.RunPython(
            'get_personnel',
            tools.Maybe(ctx, 'Personnel'),
            'person'
        )
    )

    def combine_name(self, ctx):
        return ctx['Data_Center_Name']['Short_Name'] + ' ' + ctx['Data_Center_Name']['Long_Name']

    def get_personnel(self, options, entity):
        """
        Returns list based on entity type.
        """
        if not isinstance(options, list):
            options = [options]

        if entity == 'person':
            people = [
                value for value in options if
                (
                    not self.list_in_string(value['First_Name'], self.ORGANIZATION_KEYWORDS) and
                    not self.list_in_string(value['Last_Name'], self.ORGANIZATION_KEYWORDS)
                )
            ]
            return people
        else:
            return options

    def list_in_string(self, string, list_):
        if any(word in string.lower() for word in list_):
            return True
        return False

Exemple #21

0

Afficher le fichier

class WorkIdentifier(Parser):
    uri = tools.RunPython('get_ncar_identifier', ctx)

    class Extra:
        description = tools.Try(ctx.Related_URL.Description)
        url_content_type = tools.Try(ctx.Related_URL.URL_Content_Type.Type)

    def get_ncar_identifier(self, ctx):
        return 'https://www.earthsystemgrid.org/dataset/{}.html'.format(
            ctx['Entry_ID'])

Exemple #22

0

Afficher le fichier

class FunderRelation(Parser):
    schema = 'Funder'

    agent = tools.Delegate(FunderAgent, ctx)
    awards = tools.Map(tools.Delegate(ThroughAwards),
                       tools.Try(tools.RunPython('get_award', ctx)))

    def get_award(self, obj):
        obj['awardURI']
        return obj

Exemple #23

0

Afficher le fichier

Fichier : normalizer.py Projet : sf2ne/SHARE

class Link(Parser):
    url = ctx
    type = tools.RunPython('get_link_type', ctx)

    def get_link_type(self, link):
        if 'dx.doi.org' in link:
            return 'doi'
        if 'biorxiv.org' in link:
            return 'provider'
        return 'misc'

Exemple #24

0

Afficher le fichier

class Preprint(Parser):
    title = tools.Try(ctx['DC.Title'])
    description = tools.Try(ctx['DC.Description'])
    # is_deleted
    date_published = tools.ParseDate(tools.Try(ctx['article:published_time']))
    date_updated = tools.ParseDate(tools.Try(ctx['DC.Date']))
    # free_to_read_type
    # free_to_read_date
    rights = tools.Try(ctx['DC.Rights'])
    language = tools.Try(ctx['DC.Language'])

    subjects = tools.Map(tools.Delegate(ThroughSubjects),
                         tools.Static('Biology'),
                         tools.Subjects(tools.Try(ctx['subject-areas'])))
    tags = tools.Map(tools.Delegate(ThroughTags), tools.Try(ctx['category']),
                     tools.Try(ctx['subject-areas']))

    identifiers = tools.Map(tools.Delegate(WorkIdentifier),
                            tools.Try(ctx['og:url']),
                            ctx['citation_public_url'], ctx['citation_doi'])

    related_agents = tools.Concat(
        tools.Map(tools.Delegate(Publisher), tools.Try(ctx['DC.Publisher'])),
        tools.Map(tools.Delegate(Creator),
                  tools.RunPython('get_contributors', ctx)))

    # related_works

    class Extra:
        identifiers = ctx['DC.Identifier']
        access_rights = ctx['DC.AccessRights']

    def get_contributors(self, link):
        authors = link.get('citation_author', []) if isinstance(
            link.get('citation_author', []),
            list) else [link['citation_author']]
        institutions = link.get(
            'citation_author_institution', []) if isinstance(
                link.get('citation_author_institution', []),
                list) else [link['citation_author_institution']]
        emails = link.get('citation_author_email', []) if isinstance(
            link.get('citation_author_email', []),
            list) else [link['citation_author_email']]

        contribs = []
        for author, email, institution in itertools.zip_longest(
                authors, emails, institutions):
            contrib = {
                'author': author,
                'institution': institution,
                'email': email,
            }
            contribs.append(contrib)

        return contribs

Exemple #25

0

Afficher le fichier

class RelatedWorkIdentifier(Parser):
    schema = 'WorkIdentifier'

    uri = tools.IRI(tools.RunPython(force_text, ctx))

    class Extra:
        related_identifier_type = ctx['@relatedIdentifierType']
        relation_type = tools.Try(ctx['@relationType'])
        related_metadata_scheme = tools.Try(ctx['@relatedMetadataScheme'])
        scheme_URI = tools.Try(ctx['@schemeURI'])
        scheme_type = tools.Try(ctx['@schemeType'])

Exemple #26

0

Afficher le fichier

Fichier : normalizer.py Projet : sf2ne/SHARE

class Link(Parser):
    url = tools.RunPython('format_link', ctx.URL)
    type = tools.RunPython('get_link_type', ctx.URL)

    class Extra:
        description = tools.Maybe(ctx, 'Description')
        url_content_type = tools.Maybe(ctx.URL_Content_Type, 'Type')

    def get_link_type(self, link):
        if 'dx.doi.org' in link:
            return 'doi'
        if self.config.home_page and self.config.home_page in link:
            return 'provider'
        return 'misc'

    def format_link(self, link):
        link_type = self.get_link_type(link)
        if link_type == 'doi':
            return format_doi_as_url(self, link)
        return link

Exemple #27

0

Afficher le fichier

    class Extra:
        """
        Fields that are combined in the base parser are relisted as singular elements that match
        their original entry to preserve raw data structure.
        """
        # An entity responsible for making contributions to the resource.
        contributor = tools.Maybe(
            tools.Maybe(ctx['record'], 'metadata')['dc'], 'dc:contributor')

        # The spatial or temporal topic of the resource, the spatial applicability of the resource,
        # or the jurisdiction under which the resource is relevant.
        coverage = tools.Maybe(
            tools.Maybe(ctx['record'], 'metadata')['dc'], 'dc:coverage')

        # An entity primarily responsible for making the resource.
        creator = tools.Maybe(
            tools.Maybe(ctx['record'], 'metadata')['dc'], 'dc:creator')

        # A point or period of time associated with an event in the lifecycle of the resource.
        dates = tools.Maybe(
            tools.Maybe(ctx['record'], 'metadata')['dc'], 'dc:date')

        # The file format, physical medium, or dimensions of the resource.
        resource_format = tools.Maybe(
            tools.Maybe(ctx['record'], 'metadata')['dc'], 'dc:format')

        # An unambiguous reference to the resource within a given context.
        identifiers = tools.Concat(
            tools.Try(ctx['record']['metadata']['dc']['dc:identifier']),
            tools.Maybe(ctx['record']['header'], 'identifier'))

        # A related resource.
        relation = tools.RunPython('get_relation', ctx)

        # A related resource from which the described resource is derived.
        source = tools.Maybe(
            tools.Maybe(ctx['record'], 'metadata')['dc'], 'dc:source')

        # The topic of the resource.
        subject = tools.Try(ctx.record.metadata.dc['dc:subject'])

        # The nature or genre of the resource.
        resource_type = tools.Try(ctx.record.metadata.dc['dc:type'])

        set_spec = tools.Maybe(ctx.record.header, 'setSpec')

        # Language also stored in the Extra class in case the language reported cannot be parsed by ParseLanguage
        language = tools.Try(ctx.record.metadata.dc['dc:language'])

        # Status in the header, will exist if the resource is deleted
        status = tools.Maybe(ctx.record.header, '@status')

Exemple #28

0

Afficher le fichier

class OAILink(Parser):
    schema = 'Link'

    url = tools.RunPython('format_link', ctx)
    type = tools.RunPython('get_link_type', ctx)

    # TODO: account for other types of links
    # i.e. ISBN

    def get_link_type(self, link):
        if 'dx.doi.org' in link:
            return 'doi'
        if self.config.home_page and self.config.home_page in link:
            return 'provider'
        return 'misc'

    def format_link(self, link):
        link_type = self.get_link_type(link)
        if link_type == 'doi':
            if 'http' in link:
                return link
            return format_doi_as_url(self, link)
        return link

Exemple #29

0

Afficher le fichier

class Award(Parser):
    name = ctx.title
    description = ctx.fundsObligatedAmt
    award_amount = tools.Int(ctx.fundsObligatedAmt)
    date = tools.ParseDate(ctx.date)
    uri = tools.RunPython(format_url, ctx.id)

    class Extra:
        funds_obligated_amt = ctx.fundsObligatedAmt
        award_id = ctx.id
        awardee_name = tools.Try(ctx.awardeeName)
        awardee_city = ctx.awardeeCity
        awardee_state_code = tools.Try(ctx.awardeeStateCode)
        date = ctx.date

Exemple #30

0

Afficher le fichier

class Award(Parser):
    description = ctx.fundsObligatedAmt
    url = tools.RunPython('format_url', ctx)

    def format_url(self, ctx):
        return 'https://www.nsf.gov/awardsearch/showAward?AWD_ID={}'.format(
            ctx['id'])

    class Extra:
        awardee_city = ctx.awardeeCity
        funds_obligated_amt = ctx.fundsObligatedAmt
        name = tools.Try(ctx.awardeeName)
        awardee_city = ctx.awardeeCity
        awardee_state_code = tools.Try(ctx.awardeeStateCode)