class CreativeWork(Parser): title = ctx.title description = Try(ctx.description) contributors = Map(Delegate(Contributor), ctx.author) links = Concat( Delegate(ThroughLinks.using(link=Delegate(ISSN)), ctx.issn), Map(Delegate(ThroughLinks), ctx.pdf_url, RunPython('format_doi', ctx.doi), ctx.fulltext_html_url)) publishers = Map(Delegate(Association.using(entity=Delegate(Publisher))), ctx.publisher) institutions = Map( Delegate(Association.using(entity=Delegate(Institution))), ctx) subject = Delegate(ThroughTags, ctx.subjects[0]) date_published = RunPython('parse_date', ctx.date) language = ctx.language tags = Concat( Map(Delegate(ThroughTags), Try(ctx.keywords), Try(ctx.subjects))) class Extra: modified = RunPython('parse_date', ctx.date) subjects = Try(ctx.subjects) affiliations = Map( Delegate(Association.using(entity=Delegate(Institution))), ctx) identifiers = ctx.identifiers volume = Try(ctx.volume) emails = Try(ctx.author_email) journal_title = Try(ctx.journal_title) journal_abbrev = Try(ctx.journal_abbrev) description_html = Try(ctx['description-html']) def format_doi(self, doi): return format_doi_as_url(self, doi) def parse_date(self, date_str): return arrow.get(dateparser.parse(date_str)).to('UTC').isoformat()
class Article(Parser): title = ctx.title description = Try(ctx.description) language = ctx.language date_published = ParseDate(ctx.date) date_updated = ParseDate(ctx.date) identifiers = Map( Delegate(WorkIdentifier), ctx.doi, ctx.pdf_url, ctx.fulltext_html_url, RunPython( lambda x: 'https://www.ncbi.nlm.nih.gov/pubmed/{}'.format(x) if x else None, Try(ctx.identifiers.pubmed)), RunPython( lambda x: 'https://www.ncbi.nlm.nih.gov/pmc/articles/{}'.format(x) if x else None, Try(ctx.identifiers.pmc)), ) subjects = Map(Delegate(ThroughSubjects), Subjects(ctx.subjects)) tags = Map(Delegate(ThroughTags), Try(ctx.keywords), Try(ctx.subjects)) related_agents = Concat( Map(Delegate(Creator), ctx.author), Map(Delegate(Publisher), ctx.publisher), ) class Extra: volume = Try(ctx.volume) journal_title = Try(ctx.journal_title) journal_abbrev = Try(ctx.journal_abbrev) description_html = Try(ctx['description-html']) issn = Try(ctx.issn)
class Extra: modified = RunPython('parse_date', ctx.date) subjects = ctx.subjects affiliations = Map( Delegate(Association.using(entity=Delegate(Institution))), ctx) identifiers = Try(ctx.identifiers) emails = Try(ctx.author_email) description_html = Try(ctx['description-html'])
class Extra: modified = RunPython('parse_date', ctx.date) subjects = Try(ctx.subjects) affiliations = Map( Delegate(Association.using(entity=Delegate(Institution))), ctx) identifiers = ctx.identifiers volume = Try(ctx.volume) emails = Try(ctx.author_email) journal_title = Try(ctx.journal_title) journal_abbrev = Try(ctx.journal_abbrev) description_html = Try(ctx['description-html'])
class CreativeWork(Parser): title = ctx.title description = Try(ctx.description) contributors = Map(Delegate(Contributor), ctx.author) links = Concat( Delegate(ThroughLinks.using(link=Delegate(ISSN)), ctx.issn), Map(Delegate(ThroughLinks), ctx.pdf_url, RunPython('format_doi', ctx.doi), ctx.fulltext_html_url)) publishers = Map(Delegate(Association.using(entity=Delegate(Publisher))), ctx.publisher) institutions = Map( Delegate(Association.using(entity=Delegate(Institution))), RunPython('get_author_institute', ctx)) date_published = RunPython('parse_date', ctx.date) language = ctx.language tags = Map(Delegate(ThroughTags), Try(ctx.keywords), Try(ctx.subjects)) subjects = Map(Delegate(ThroughSubjects), Subjects(ctx.subjects)) class Extra: modified = RunPython('parse_date', ctx.date) subjects = Try(ctx.subjects) identifiers = ctx.identifiers volume = Try(ctx.volume) emails = Try(ctx.author_email) journal_title = Try(ctx.journal_title) journal_abbrev = Try(ctx.journal_abbrev) description_html = Try(ctx['description-html']) def format_doi(self, doi): return format_doi_as_url(self, doi) def parse_date(self, date_str): return arrow.get(dateparser.parse(date_str)).to('UTC').isoformat() def get_author_institute(self, context): # read into a set while preserving order and passed back to erase duplicates seen = set() if 'author_institution' in context: if isinstance(context['author_institution'], str): return [ x for x in [context['author_institution']] if x not in seen and not seen.add(x) ] return [ x for x in context['author_institution'] if x not in seen and not seen.add(x) ] # the below is author_institutions with an 's', it will always be a string # and is sometimes present in the case that author_institution is not. # This will always be a string return [ x for x in context['author_institutions'].split('; ') if x not in seen and not seen.add(x) ]
class Person(Parser): given_name = OneOf( ctx.embeds.users.data.attributes.given_name, ctx.embeds.users.errors[0].meta.given_name, ) family_name = OneOf( ctx.embeds.users.data.attributes.family_name, ctx.embeds.users.errors[0].meta.family_name, ) additional_name = OneOf( ctx.embeds.users.data.attributes.middle_names, ctx.embeds.users.errors[0].meta.middle_names, ) suffix = OneOf( ctx.embeds.users.data.attributes.suffix, ctx.embeds.users.errors[0].meta.suffix, ) identifiers = Map(Delegate(ThroughIdentifiers), Try(ctx.embeds.users.data.links.html), Try(ctx.embeds.users.data.links.profile_image), Try(ctx.embeds.users.errors[0].meta.profile_image)) class Extra: nodes = Try( ctx.embeds.users.data.relationships.nodes.links.related.href) locale = Try(ctx.embeds.users.data.attributes.locale) date_registered = Try(ctx.embeds.users.data.attributes.date_registered) active = Try(ctx.embeds.users.data.attributes.active) timezone = Try(ctx.embeds.users.data.attributes.timezone) profile_image = Try(ctx.embeds.users.data.links.profile_image)
class Contributor(Parser): person = Delegate(Person, ctx) order_cited = ctx.attributes.index bibliographic = ctx.attributes.bibliographic cited_name = OneOf( ctx.embeds.users.data.attributes.full_name, ctx.embeds.users.errors[0].meta.full_name, )
class Preprint(Parser): title = ctx.attributes.title description = Try(ctx.attributes.abstract) contributors = Map(Delegate(Contributor), ctx.contributors) date_updated = ParseDate(ctx.attributes.date_modified) date_published = ParseDate(ctx.attributes.date_created) # NOTE: OSF has a direct mapping to SHARE's taxonomy. Subjects() is not needed subjects = Map(Delegate(ThroughSubjects), ctx.attributes.subjects) links = Map(Delegate(ThroughLinks), ctx.links.self, ctx.links.html, Try(ctx.links.doi)) tags = Map(Delegate(ThroughTags), Try(ctx.attributes.tags)) rights = Try(ctx.attributes.node_license) class Extra: files = ctx.relationships.files.links.related.href type_soc = ctx.type id_soc = ctx.id doi_plain = ctx.attributes.doi
class ThroughLinks(Parser): link = Delegate(Link, ctx)
class Affiliation: # The entity used here could be any of the entity subclasses (Institution, Publisher, Funder, Organization). entity = Delegate(Institution, ctx)
class PersonEmail(Parser): email = Delegate(Email, ctx)
class Creator(Parser): agent = Delegate(Person, ctx) cited_as = ctx order_cited = ctx('index')
class Publisher(Parser): agent = Delegate(Organization, ctx)
class ThroughIdentifiers(Parser): identifier = Delegate(Identifier, ctx)
class ThroughTags(Parser): tag = Delegate(Tag, ctx)
class Contributor(Parser): person = Delegate(Person, ctx) cited_name = ctx order_cited = ctx('index')
class ThroughSubjects(Parser): subject = Delegate(Subject, ctx)