Python Unique Examples

Programming Language: Python

Namespace/Package Name: share.transform.chain.links

Method/Function: Unique

Examples at hotexamples.com: 3

Python Unique - 3 examples found. These are the top rated real world Python examples of share.transform.chain.links.Unique extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

class MODSAgent(Parser):
    schema = tools.RunPython('get_agent_schema', ctx)

    name = tools.OneOf(tools.RunPython(force_text, ctx['mods:displayForm']),
                       tools.RunPython('squash_name_parts', ctx))

    related_agents = tools.Map(
        tools.Delegate(IsAffiliatedWith),
        tools.Concat(
            tools.Try(
                tools.Filter(
                    lambda x: bool(x),
                    tools.RunPython(force_text, ctx['mods:affiliation'])))))

    identifiers = tools.Map(
        tools.Delegate(MODSAgentIdentifier),
        tools.Unique(
            tools.Map(
                tools.Try(tools.IRI(), exceptions=(ValueError, )),
                tools.Map(
                    tools.RunPython(force_text),
                    tools.Filter(
                        lambda obj: 'invalid' not in obj,
                        tools.Try(ctx['mods:nameIdentifier']),
                    )))))

    class Extra:
        name_type = tools.Try(ctx['@type'])
        name_part = tools.Try(ctx['mods:namePart'])
        affiliation = tools.Try(ctx['mods:affiliation'])
        description = tools.Try(ctx['mods:description'])
        display_form = tools.Try(ctx['mods:displayForm'])
        etal = tools.Try(ctx['mods:etal'])
        name_identifier = tools.Try(ctx['mods:nameIdentifier'])

    def squash_name_parts(self, name):
        name_parts = get_list(name, 'mods:namePart')
        return ' '.join([force_text(n) for n in name_parts])

    def get_agent_schema(self, obj):
        name_type = obj.get('@type')
        if name_type == 'personal':
            return 'person'
        if name_type == 'conference':
            return 'organization'
        # TODO SHARE-718
        # if name_type == 'family':
        #    return 'family'
        if name_type == 'corporate':
            return GuessAgentTypeLink(default='organization').execute(
                self.squash_name_parts(obj))
        return GuessAgentTypeLink().execute(self.squash_name_parts(obj))

Example #2

Show file

class OAICreativeWork(Parser):
    default_type = None
    type_map = None

    schema = tools.RunPython(
        'get_schema',
        tools.OneOf(ctx.record.metadata.dc['dc:type'], tools.Static(None)))

    title = tools.Join(
        tools.RunPython(force_text,
                        tools.Try(ctx.record.metadata.dc['dc:title'])))
    description = tools.Join(
        tools.RunPython(force_text,
                        tools.Try(ctx.record.metadata.dc['dc:description'])))

    identifiers = tools.Map(
        tools.Delegate(OAIWorkIdentifier),
        tools.Unique(
            tools.Map(
                tools.Try(tools.IRI(), exceptions=(InvalidIRI, )),
                tools.Filter(
                    not_citation,
                    tools.RunPython(
                        force_text,
                        tools.Concat(
                            tools.Try(ctx.record.metadata.dc['dc:identifier']),
                            tools.Try(ctx.record.header['identifier'])))))))

    related_works = tools.Concat(
        tools.Map(
            tools.Delegate(OAIWorkRelation),
            tools.Unique(
                tools.Map(tools.Try(tools.IRI(), exceptions=(InvalidIRI, )),
                          tools.RunPython('get_relation', ctx)))))

    related_agents = tools.Concat(
        tools.Map(tools.Delegate(OAICreator),
                  tools.Try(ctx.record.metadata.dc['dc:creator'])),
        tools.Map(tools.Delegate(OAIContributor),
                  tools.Try(ctx.record.metadata.dc['dc:contributor'])),
        tools.Map(
            tools.Delegate(OAIPublisher),
            tools.RunPython(force_text,
                            tools.Try(
                                ctx.record.metadata.dc['dc:publisher']))),
    )

    rights = tools.Join(tools.Try(ctx.record.metadata.dc['dc:rights']))

    # Note: this is only taking the first language in the case of multiple languages
    language = tools.ParseLanguage(
        tools.Try(ctx.record.metadata.dc['dc:language'][0]), )

    subjects = tools.Map(
        tools.Delegate(OAIThroughSubjects),
        tools.Subjects(
            tools.Map(
                tools.RunPython('tokenize'),
                tools.RunPython(
                    force_text,
                    tools.Concat(
                        tools.Try(ctx.record.header.setSpec),
                        tools.Try(ctx.record.metadata.dc['dc:type']),
                        tools.Try(ctx.record.metadata.dc['dc:format']),
                        tools.Try(ctx.record.metadata.dc['dc:subject']),
                    )))))

    tags = tools.Map(
        tools.Delegate(OAIThroughTags),
        tools.Concat(tools.Map(
            tools.RunPython('tokenize'),
            tools.RunPython(
                force_text,
                tools.Concat(
                    tools.Try(ctx.record.header.setSpec),
                    tools.Try(ctx.record.metadata.dc['dc:type']),
                    tools.Try(ctx.record.metadata.dc['dc:format']),
                    tools.Try(ctx.record.metadata.dc['dc:subject']),
                ))),
                     deep=True))

    date_updated = tools.ParseDate(ctx.record.header.datestamp)

    is_deleted = tools.RunPython('check_status',
                                 tools.Try(ctx.record.header['@status']))

    class Extra:
        """
        Fields that are combined in the base parser are relisted as singular elements that match
        their original entry to preserve raw data structure.
        """
        # An agent responsible for making contributions to the resource.
        contributor = tools.Try(ctx.record.metadata.dc['dc:contributor'])

        # The spatial or temporal topic of the resource, the spatial applicability of the resource,
        # or the jurisdiction under which the resource is relevant.
        coverage = tools.Try(ctx.record.metadata.dc['dc:coverage'])

        # An agent primarily responsible for making the resource.
        creator = tools.Try(ctx.record.metadata.dc['dc:creator'])

        # A point or period of time associated with an event in the lifecycle of the resource.
        dates = tools.Try(ctx.record.metadata.dc['dc:date'])

        # The file format, physical medium, or dimensions of the resource.
        resource_format = tools.Try(ctx.record.metadata.dc['dc:format'])

        # An unambiguous reference to the resource within a given context.
        identifiers = tools.Concat(
            tools.Try(ctx.record.metadata.dc['dc:identifier']),
            tools.Try(ctx.record.header['identifier']))

        # A related resource.
        relation = tools.RunPython('get_relation', ctx)

        # A related resource from which the described resource is derived.
        source = tools.Try(ctx.record.metadata.dc['dc:source'])

        # The nature or genre of the resource.
        resource_type = tools.Try(ctx.record.metadata.dc['dc:type'])

        set_spec = tools.Try(ctx.record.header.setSpec)

        # Language also stored in the Extra class in case the language reported cannot be parsed by ParseLanguage
        language = tools.Try(ctx.record.metadata.dc['dc:language'])

        # Status in the header, will exist if the resource is deleted
        status = tools.Try(ctx.record.header['@status'])

    def check_status(self, status):
        if status == 'deleted':
            return True
        return False

    def get_schema(self, types):
        if not types or not self.type_map:
            return self.default_type
        if isinstance(types, str):
            types = [types]
        for t in types:
            if isinstance(t, dict):
                t = t['#text']
            t = t.lower()
            if t in self.type_map:
                return self.type_map[t]
        return self.default_type

    def tokenize(self, data):
        if isinstance(data, str):
            data = [data]
        tokens = []
        for item in data:
            tokens.extend(
                [x.strip() for x in re.split(r'(?: - )|\.|,', item) if x])
        return tokens

    def get_relation(self, ctx):
        if not ctx['record'].get('metadata'):
            return []
        relation = ctx['record']['metadata']['dc'].get('dc:relation') or []
        identifiers = ctx['record']['metadata']['dc'].get(
            'dc:identifier') or []
        if isinstance(identifiers, dict):
            identifiers = (identifiers, )
        identifiers = ''.join(i['#text'] if isinstance(i, dict) else i
                              for i in identifiers if i)

        identifiers = re.sub(
            'http|:|/', '',
            identifiers + ctx['record']['header']['identifier'])

        if isinstance(relation, dict):
            relation = (relation['#text'], )

        return [
            r for r in relation
            if r and re.sub('http|:|/', '', r) not in identifiers
        ]

Example #3

Show file

class MODSCreativeWork(Parser):
    default_type = 'CreativeWork'
    type_map = None
    role_map = None

    schema = tools.RunPython(
        'get_schema',
        tools.OneOf(tools.RunPython(force_text, ctx['mods:genre']),
                    tools.Static(None)))

    title = tools.RunPython('join_title_info', ctx)

    # Abstracts have the optional attribute "shareable". Don't bother checking for it, because
    # abstracts that are not shareable should not have been shared with SHARE.
    description = tools.Join(
        tools.RunPython(force_text, tools.Try(ctx['mods:abstract']), '\n'))

    identifiers = tools.Map(
        tools.Delegate(MODSWorkIdentifier),
        tools.Unique(
            tools.Map(
                tools.Try(tools.IRI(), exceptions=(ValueError, )),
                tools.Map(
                    tools.RunPython(force_text),
                    tools.Filter(
                        lambda obj: 'invalid' not in obj,
                        tools.Concat(
                            tools.Try(ctx['mods:identifier']),
                            tools.Try(ctx.header['identifier']),
                            tools.Try(ctx['mods:location']['mods:url']),
                        ))))))

    related_works = tools.Concat(
        tools.Map(tools.Delegate(MODSWorkRelation),
                  tools.Try(ctx['mods:relatedItem'])))

    related_agents = tools.Concat(
        tools.Map(tools.Delegate(MODSCreator),
                  tools.RunPython('filter_names', ctx, 'creator')),
        tools.Map(tools.Delegate(MODSFunder),
                  tools.RunPython('filter_names', ctx, 'funder')),
        tools.Map(tools.Delegate(MODSHost),
                  tools.RunPython('filter_names', ctx, 'host')),
        tools.Map(tools.Delegate(MODSPublisher),
                  tools.RunPython('filter_names', ctx, 'publisher')),
        tools.Map(
            tools.Delegate(MODSContributor),
            tools.RunPython('filter_names',
                            ctx,
                            'creator',
                            'funder',
                            'host',
                            'publisher',
                            invert=True)),
        tools.Map(
            tools.Delegate(MODSSimplePublisher),
            tools.Try(ctx['mods:originInfo']['mods:publisher']),
        ),
    )

    rights = tools.RunPython(force_text,
                             tools.Try(ctx['mods:accessCondition']), '\n')

    language = tools.ParseLanguage(
        tools.Try(ctx['mods:language']['mods:languageTerm']), )

    subjects = tools.Map(
        tools.Delegate(MODSThroughSubjects),
        tools.Subjects(
            tools.Concat(tools.Try(ctx['mods:subject']['mods:topic']), )))

    tags = tools.Map(
        tools.Delegate(MODSThroughTags),
        tools.Concat(tools.Map(
            tools.RunPython('tokenize'),
            tools.Map(
                tools.RunPython(force_text),
                tools.Try(ctx.header.setSpec),
                tools.Try(ctx['mods:genre']),
                tools.Try(ctx['mods:classification']),
                tools.Try(ctx['mods:subject']['mods:topic']),
            )),
                     deep=True))

    date_updated = tools.ParseDate(tools.Try(ctx.header.datestamp))

    # TODO (in regulator) handle date ranges, uncertain dates ('1904-1941', '1890?', '1980-', '19uu', etc.)
    date_published = tools.OneOf(
        tools.ParseDate(
            tools.RunPython(
                force_text,
                tools.Try(ctx['mods:originInfo']['mods:dateIssued']))),
        tools.Static(None))

    is_deleted = tools.RunPython(lambda status: status == 'deleted',
                                 tools.Try(ctx.record.header['@status']))

    class Extra:
        """
        Fields that are combined in the base parser are relisted as singular elements that match
        their original entry to preserve raw data structure.
        """

        # (dc:description) http://www.loc.gov/standards/mods/userguide/abstract.html
        abstract = tools.Try(ctx['mods:abstract'])

        # (dc:rights) http://www.loc.gov/standards/mods/userguide/accesscondition.html
        accessConditions = tools.Try(ctx['mods:accessCondition'])

        # (dc:subject) http://www.loc.gov/standards/mods/userguide/classification.html
        classification = tools.Try(ctx['mods:classification'])

        # (N/A) http://www.loc.gov/standards/mods/userguide/extension.html
        extension = tools.Try(ctx['mods:extension'])

        # SHARE type
        # (dc:type) http://www.loc.gov/standards/mods/userguide/genre.html
        genre = tools.Try(ctx['mods:genre'])

        # (dc:identifier) http://www.loc.gov/standards/mods/userguide/identifier.html
        identifier = tools.Try(ctx['mods:identifier'])

        # (dc:language) http://www.loc.gov/standards/mods/userguide/language.html
        language = tools.Try(ctx['mods:language'])

        # (dc:identifier for url) http://www.loc.gov/standards/mods/userguide/location.html
        location = tools.Try(ctx['mods:location'])

        # (dc:creator|dc:contributor) http://www.loc.gov/standards/mods/userguide/name.html
        name = tools.Try(ctx['mods:name'])

        # (dc:description) http://www.loc.gov/standards/mods/userguide/note.html
        note = tools.Try(ctx['mods:note'])

        # (dc:publisher|dc:date) http://www.loc.gov/standards/mods/userguide/origininfo.html
        originInfo = tools.Try(ctx['mods:originInfo'])

        # Extra
        # (dc:title) http://www.loc.gov/standards/mods/userguide/part.html
        part = tools.Try(ctx['mods:part'])

        # (dc:format or N/A) http://www.loc.gov/standards/mods/userguide/physicaldescription.html
        physicalDescription = tools.Try(ctx['mods:physicalDescription'])

        # Metadata information
        # (N/A) http://www.loc.gov/standards/mods/userguide/recordinfo.html
        recordInfo = tools.Try(ctx['mods:recordInfo'])

        # (dc:relation) http://www.loc.gov/standards/mods/userguide/relateditem.html
        relatedItem = tools.Try(ctx['mods:relatedItem'])

        # (dc:subject|dc:type|dc:coverage|N/A) http://www.loc.gov/standards/mods/userguide/subject.html
        subject = tools.Try(ctx['mods:subject'])

        # (dc:description) http://www.loc.gov/standards/mods/userguide/tableofcontents.html
        tableOfContents = tools.Try(ctx['mods:tableOfContents'])

        # (N/A) http://www.loc.gov/standards/mods/userguide/targetaudience.html
        targetAudience = tools.Try(ctx['mods:targetAudience'])

        # (dc:title) http://www.loc.gov/standards/mods/userguide/titleinfo.html
        titleInfo = tools.Try(ctx['mods:titleInfo'])

        # Extra
        # (dc:type) http://www.loc.gov/standards/mods/userguide/typeofresource.html
        typeOfResource = tools.Try(ctx['mods:typeOfResource'])

    def get_schema(self, types):
        if not types or not self.type_map:
            return self.default_type
        if isinstance(types, str):
            types = [types]
        for t in types:
            if isinstance(t, dict):
                t = t['#text']
            t = t.lower()
            if t in self.type_map:
                return self.type_map[t]
        return self.default_type

    def tokenize(self, data):
        if isinstance(data, str):
            data = [data]
        tokens = []
        for item in data:
            tokens.extend(
                [x.strip() for x in re.split(r'(?: - )|\.|,', item) if x])
        return tokens

    # Map titleInfos to a string: https://www.loc.gov/standards/mods/userguide/titleinfo.html#mappings
    def join_title_info(self, obj):
        def get_part(title_info, part_name, delimiter=''):
            part = force_text(title_info.get(part_name, ''), ' ').strip()
            return delimiter + part if part else ''

        title_infos = get_list(obj, 'mods:titleInfo')
        titles = []
        for title_info in title_infos:
            title = ''
            title += get_part(title_info, 'mods:nonSort')
            title += get_part(title_info, 'mods:title')
            title += get_part(title_info, 'mods:subTitle', ': ')
            title += get_part(title_info, 'mods:partNumber', '. ')
            title += get_part(title_info, 'mods:partName', ': ')
            if title:
                titles.append(title)
        return '. '.join(titles)

    def filter_names(self, obj, *roles, invert=False):
        names = get_list(obj, 'mods:name')
        filtered = [*names] if invert else []
        for name in names:
            name_roles = get_list(name, 'mods:role')
            for role in name_roles:
                role_terms = get_list(role, 'mods:roleTerm')
                name_roles = {force_text(r).lower() for r in role_terms}
                name_roles.update({
                    self.role_map[r]
                    for r in name_roles if r in self.role_map
                })
                if name_roles.intersection(roles):
                    if invert:
                        filtered.remove(name)
                    else:
                        filtered.append(name)
        return filtered