def classificationgroup(self): """List with (subject group ID, number of documents)-tuples.""" path = ['author-profile', 'classificationgroup', 'classifications', 'classification'] out = [(item['$'], item['@frequency']) for item in listify(chained_get(self._json, path, []))] return out or None
def isbn(self): """ISBNs belonging to publicationName as tuple of variying length, (e.g. ISBN-10 or ISBN-13).""" isbns = listify(chained_get(self._head, ['source', 'isbn'], [])) if len(isbns) == 0: return None else: return tuple((i['$'] for i in isbns))
def confsponsor(self): """Sponsor(s) of the conference the abstract belongs to.""" sponsors = chained_get(self._confevent, ['confsponsors', 'confsponsor'], []) if len(sponsors) == 0: return None if isinstance(sponsors, list): return [s['$'] for s in sponsors] return sponsors
def name_variants(self): """A list of namedtuples representing variants of the affiliation name with number of documents referring to this variant. """ out = [] variant = namedtuple('Variant', 'name doc_count') for var in chained_get(self._json, ['name-variants', 'name-variant'], []): new = variant(name=var['$'], doc_count=var.get('@doc-count')) out.append(new) return out
def subject_areas(self): """List of named tuples of subject areas in the form (area, abbreviation, code) of author's publication. """ path = ['subject-areas', 'subject-area'] area = namedtuple('Subjectarea', 'area abbreviation code') areas = [area(area=item['$'], code=item['@code'], abbreviation=item['@abbrev']) for item in chained_get(self._json, path, [])] return areas or None
def subject_areas(self): """List of namedtuples containing subject areas of the article in the form (). Note: Requires the FULL view of the article. """ area = namedtuple('Area', 'area abbreviation code') path = ['subject-areas', 'subject-area'] out = [area(area=item['$'], abbreviation=item['@abbrev'], code=item['@code']) for item in listify(chained_get(self._json, path, []))] return out or None
def publisher(self): """Name of the publisher of the abstract. Note: Information provided in the FULL view of the article might be more complete. """ # Return information from FULL view, fall back to other views full = chained_get(self._head, ['source', 'publisher', 'publishername']) if full is None: return self._json['coredata'].get('dc:publisher') else: return full
def journal_history(self): """List of named tuples of authored publications in the form (sourcetitle, abbreviation, type, issn). issn is only given for journals. abbreviation and issn may be None. """ jour = namedtuple('Journal', 'sourcetitle abbreviation type issn') path = ['author-profile', 'journal-history', 'journal'] hist = [jour(sourcetitle=pub['sourcetitle'], issn=pub.get('issn'), abbreviation=pub.get('sourcetitle-abbrev'), type=pub['@type']) for pub in listify(chained_get(self._json, path, []))] return hist or None
def name_variants(self): """List of named tuples containing variants of the author name with number of documents published with that variant. """ fields = 'indexed_name initials surname given_name doc_count' variant = namedtuple('Variant', fields) path = ['author-profile', 'name-variant'] out = [variant(indexed_name=var['indexed-name'], surname=var['surname'], doc_count=var.get('@doc-count'), initials=var['initials'], given_name=var.get('given-name')) for var in listify(chained_get(self._json, path, []))] return out or None
def authors(self): """A list of namedtuples representing the article's authors, in the form (auid, indexed_name, surname, given_name, affiliation_id, affiliation, city, country). Note: The affiliation referred to here is what Scopus' algorithm determined as the main affiliation. Property `authorgroup` provides all affiliations. """ out = [] fields = 'auid indexed_name surname given_name affiliation' auth = namedtuple('Author', fields) for item in chained_get(self._json, ['authors', 'author'], []): affs = [a for a in listify(item.get('affiliation')) if a] if affs: aff = [aff.get('@id') for aff in affs] else: aff = None new = auth(auid=item['@auid'], surname=item.get('ce:surname'), indexed_name=item.get('ce:indexed-name'), affiliation=aff, given_name=chained_get(item, ['preferred-name', 'ce:given-name'])) out.append(new) return out or None
def sequencebank(self): """List of namedtuples representing biological entities defined or mentioned in the text, in the form (name, sequence_number, type). """ path = ['enhancement', 'sequencebanks', 'sequencebank'] items = listify(chained_get(self._head, path, [])) bank = namedtuple('Sequencebank', 'name sequence_number type') out = [] for item in items: numbers = listify(item['sequence-number']) for number in numbers: new = bank(name=item['@name'], sequence_number=number['$'], type=number['@type']) out.append(new) return out or None
def contributor_group(self): """List of namedtuples representing contributors compiled by Scopus, in the form (given_name, initials, surname, indexed_name, role). """ items = listify(chained_get(self._head, ['source', 'contributor-group'], [])) out = [] fields = 'given_name initials surname indexed_name role' pers = namedtuple('Contributor', fields) for item in items: entry = item.get('contributor', {}) new = pers(indexed_name=entry.get('ce:indexed-name'), role=entry.get('@role'), surname=entry.get('ce:surname'), given_name=entry.get('ce:given-name'), initials=entry.get('ce:initials')) out.append(new) return out or None
def funding(self): """List of namedtuples parsed funding information in the form (agency string id acronym country). """ path = ['item', 'xocs:meta', 'xocs:funding-list', 'xocs:funding'] funds = listify(chained_get(self._json, path, [])) out = [] fund = namedtuple('Funding', 'agency string id acronym country') for item in funds: new = fund(agency=item.get('xocs:funding-agency'), string=item.get('xocs:funding-agency-matched-string'), id=item.get('xocs:funding-agency-id'), acronym=item.get('xocs:funding-agency-acronym'), country=item.get('xocs:funding-agency-country')) out.append(new) return out or None
def chemicals(self): """List of namedtuples representing chemical entities in the form (source, chemical_name, cas_registry_number). In case multiple numbers given, they are joined on ";". """ path = ['enhancement', 'chemicalgroup', 'chemicals'] items = listify(chained_get(self._head, path, [])) chemical = namedtuple('Chemical', 'source chemical_name cas_registry_number') out = [] for item in items: for chem in listify(item['chemical']): number = chem.get('cas-registry-number') try: # Multiple numbers given num = ";".join([n['$'] for n in number]) except TypeError: num = number new = chemical(source=item['@source'], cas_registry_number=num, chemical_name=chem['chemical-name']) out.append(new) return out or None
def authorgroup(self): """A list of namedtuples representing the article's authors organized by affiliation, in the form (affiliation_id, dptid, organization, city, postalcode, addresspart, country, auid, indexed_name, surname, given_name). If "given_name" is not present, fall back to initials. Note: Affiliation information might be missing or mal-assigned even when it lookes correct in the web view. In this case please request a correction. """ out = [] fields = 'affiliation_id dptid organization city postalcode '\ 'addresspart country auid indexed_name surname given_name' auth = namedtuple('Author', fields) items = listify(self._head.get('author-group', [])) for item in items: # Affiliation information aff = item.get('affiliation', {}) try: aff_ids = listify(aff['affiliation-id']) aff_id = ", ".join([a["@afid"] for a in aff_ids]) except KeyError: aff_id = aff.get("@afid") org = _get_org(aff) # Author information (might relate to collaborations) authors = listify(item.get('author', item.get('collaboration', []))) for au in authors: try: given = au.get('ce:given-name', au['ce:initials']) except KeyError: # Collaboration given = au.get('ce:text') new = auth(affiliation_id=aff_id, organization=org, city=aff.get('city'), dptid=aff.get("@dptid"), postalcode=aff.get('postal-code'), addresspart=aff.get('address-part'), country=aff.get('country'), auid=au.get('@auid'), surname=au.get('ce:surname'), given_name=given, indexed_name=chained_get(au, ['preferred-name', 'ce:indexed-name'])) out.append(new) return out or None
def state(self): """The state (country's administrative sububunit) of the affiliation.""" path = ['institution-profile', 'address', 'state'] return chained_get(self._json, path)
def refcount(self): """Number of references of an article. Note: Requires the FULL view of the article. """ path = ['item', 'bibrecord', 'tail', 'bibliography', '@refcount'] return chained_get(self._json, path)
def publisheraddress(self): """Name of the publisher of the abstract.""" return chained_get(self._head, ['source', 'publisher', 'publisheraddress'])
def surname(self): """Author's preferred surname.""" path = ['author-profile', 'preferred-name', 'surname'] return chained_get(self._json, path)
def status(self): """The status of the author profile.""" return chained_get(self._json, ["author-profile", "status"])
def indexed_name(self): """Author's name as indexed by Scopus.""" path = ['author-profile', 'preferred-name', 'indexed-name'] return chained_get(self._json, path)
def given_name(self): """Author's preferred given name.""" path = ['author-profile', 'preferred-name', 'given-name'] return chained_get(self._json, path)
def website(self): """Website of publisher.""" return chained_get(self._head, ['source', 'website', 'ce:e-address', '$'])
def funding_text(self): """The raw text from which Scopus derives funding information.""" path = ['item', 'xocs:meta', 'xocs:funding-list', 'xocs:funding-text'] return chained_get(self._json, path)
def references(self): """List of namedtuples representing references listed in the abstract, in the form (position, id, doi, title, authors, authors_auid, authors_affiliationid, sourcetitle, publicationyear, volume, issue, first, last, citedbycount, text, fulltext). `position` is the number at which the reference appears in the document, `id` is the Scopus ID of the referenced abstract (EID without the "2-s2.0-"), `authors` is a string of the names of the authors in the format "Surname1, Initials1; Surname2, Initials2", `authors_auid` is a string of the author IDs joined on "; ", `authors_affiliationid` is a string of the authors' affiliation IDs joined on "; ", `sourcetitle` is the name of the source (e.g. the journal), `publicationyear` is the year of the publication as a string, `volume` and `issue`, are strings referring to the volume and issue, `first` and `last` refer to the page range, `citedbycount` is a string for the total number of citations of the cited item, `text` is Scopus-provided information on the publication, `fulltext` is the text the authors used for the reference. Note: Requires either the FULL view or REF view of the article. Might be empty even if refcount is positive. Specific fields can be empty. Author lists (authors, authors_auid, authors_affiliationid) may contain duplicates but have been filtered of None's. """ out = [] fields = 'position id doi title authors authors_auid '\ 'authors_affiliationid sourcetitle publicationyear volume '\ 'issue first last citedbycount text fulltext' ref = namedtuple('Reference', fields) path = ['item', 'bibrecord', 'tail', 'bibliography', 'reference'] items = listify(chained_get(self._json, path, self._json.get('references', {}).get('reference', []))) for item in items: info = item.get('ref-info', item) volisspag = info.get('volisspag', {}) or {} if isinstance(volisspag, list): volisspag = volisspag[0] # Parse author information try: # FULL view parsing auth = listify(item['ref-info']['ref-authors']['author']) authors = [', '.join([d['ce:surname'], d['ce:initials']]) for d in auth] auids = None affids = None except KeyError: # REF view parsing auth = (info.get('author-list') or {}).get('author', []) authors = [', '.join(filter(None, [d.get('ce:surname'), d.get('ce:given-name')])) for d in auth] auids = "; ".join(filter(None, [d.get('@auid') for d in auth])) affs = filter(None, [d.get('affiliation') for d in auth]) affids = "; ".join([aff.get('@id') for aff in affs]) # Parse IDs try: ids = listify(info['refd-itemidlist']['itemid']) except KeyError: ids = [] try: doi = _select_by_idtype(ids, 'DOI')[0] except IndexError: doi = info.get('ce:doi') try: scopus_id = _select_by_idtype(ids, 'SGR')[0] except IndexError: scopus_id = info.get('scopus-id') # Combine information new = ref(position=item.get('@id'), id=scopus_id, doi=doi, authors="; ".join(authors), authors_auid=auids or None, authors_affiliationid=affids or None, title=info.get('ref-title', {}).get('ref-titletext', info.get('title')), sourcetitle=info.get('ref-sourcetitle', info.get('sourcetitle')), publicationyear=info.get('ref-publicationyear', {}).get('@first'), volume=volisspag.get('voliss', {}).get('@volume'), issue=volisspag.get('voliss', {}).get('@issue'), first=volisspag.get('pagerange', {}).get('@first'), last=volisspag.get('pagerange', {}).get('@last'), citedbycount=info.get('citedby-count'), text=info.get('ref-text'), fulltext=item.get('ref-fulltext')) out.append(new) return out or None
def postal_code(self): """The postal code of the affiliation.""" path = ['institution-profile', 'address', 'postal-code'] return chained_get(self._json, path)
def references(self): """List of namedtuples representing references listed in the abstract, in the form (position, id, doi, title, authors, authors_auid, authors_affiliationid, sourcetitle, publicationyear, volume, issue, first, last, citedbycount, text, fulltext). `position` is the number at which the reference appears in the document, `id` is the Scopus ID of the referenced abstract (EID without the "2-s2.0-"), `authors` is a string of the names of the authors in the format "Surname1, Initials1; Surname2, Initials2", `authors_auid` is a string of the author IDs joined on "; ", `authors_affiliationid` is a string of the authors' affiliation IDs joined on "; ", `sourcetitle` is the name of the source (e.g. the journal), `publicationyear` is the year of the publication as a string, `volume` and `issue`, are strings referring to the volume and issue, `first` and `last` refer to the page range, `citedbycount` is a string for the total number of citations of the cited item, `text` is Scopus-provided information on the publication, `fulltext` is the text the authors used for the reference. Note: Requires either the FULL view or REF view of the article. Might be empty even if refcount is positive. Specific fields can be empty. Author lists (authors, authors_auid, authors_affiliationid) may contain duplicates but have been filtered of None's. """ out = [] fields = 'position id doi title authors authors_auid '\ 'authors_affiliationid sourcetitle publicationyear volume '\ 'issue first last citedbycount text fulltext' ref = namedtuple('Reference', fields) path = ['item', 'bibrecord', 'tail', 'bibliography', 'reference'] items = listify( chained_get(self._json, path, self._json.get('references', {}).get('reference', []))) for item in items: info = item.get('ref-info', item) volisspag = info.get('volisspag', {}) or {} if isinstance(volisspag, list): volisspag = volisspag[0] # Parse author information try: # FULL view parsing auth = listify(item['ref-info']['ref-authors']['author']) authors = [ ', '.join([d['ce:surname'], d['ce:initials']]) for d in auth ] auids = None affids = None except KeyError: # REF view parsing auth = (info.get('author-list') or {}).get('author', []) authors = [ ', '.join( filter(None, [d.get('ce:surname'), d.get('ce:given-name')])) for d in auth ] auids = "; ".join(filter(None, [d.get('@auid') for d in auth])) affs = filter(None, [d.get('affiliation') for d in auth]) affids = "; ".join([aff.get('@id') for aff in affs]) # Parse IDs try: ids = listify(info['refd-itemidlist']['itemid']) except KeyError: ids = [] try: doi = [d['$'] for d in ids if d['@idtype'] == 'DOI'][0] except IndexError: doi = info.get('ce:doi') try: scopus_id = [d['$'] for d in ids if d['@idtype'] == 'SGR'][0] except IndexError: scopus_id = info.get('scopus-id') # Combine information new = ref(position=item.get('@id'), id=scopus_id, doi=doi, authors="; ".join(authors), authors_auid=auids or None, authors_affiliationid=affids or None, title=info.get('ref-title', {}).get('ref-titletext', info.get('title')), sourcetitle=info.get('ref-sourcetitle', info.get('sourcetitle')), publicationyear=info.get('ref-publicationyear', {}).get('@first'), volume=volisspag.get('voliss', {}).get('@volume'), issue=volisspag.get('voliss', {}).get('@issue'), first=volisspag.get('pagerange', {}).get('@first'), last=volisspag.get('pagerange', {}).get('@last'), citedbycount=info.get('citedby-count'), text=info.get('ref-text'), fulltext=item.get('ref-fulltext')) out.append(new) return out or None
def initials(self): """Author's preferred initials.""" path = ['author-profile', 'preferred-name', 'initials'] return chained_get(self._json, path)
def __init__(self, identifier=None, view='META_ABS', refresh=False, id_type=None, EID=None): """Class to represent the results from a Scopus abstract. Parameters ---------- identifier : str or int The identifier of an abstract. Can be the Scoups EID, the Scopus ID, the PII, the Pubmed-ID or the DOI. EID : str (deprecated since 1.2) Deprecated in favor of `identifier`, will be removed in a future release. id_type: str (optional, default=None) The type of used ID. Allowed values: None, 'eid', 'pii', 'scopus_id', 'pubmed_id', 'doi'. If the value is None, the function tries to infer the ID type itself. view : str (optional, default=META_ABS) The view of the file that should be downloaded. Will not take effect for already cached files. Allowed values: META, META_ABS, REF, FULL, where FULL includes all information of META_ABS view and META_ABS includes all information of the META view. See https://dev.elsevier.com/guides/AbstractRetrievalViews.htm for details. refresh : bool (optional, default=False) Whether to refresh the cached file if it exists or not. Raises ------ ValueError If the id_type parameter or the view parameter contains invalid entries. Notes ----- The files are cached in ~/.scopus/abstract_retrieval/{identifier}. In case a DOI is used as identifier, an underscore replaces the forward slash in the filename. """ # Checks if identifier is None and EID: text = "Parameter EID is deprecated in favor of parameter "\ "identifier. Please update your code." warn(text, UserWarning) identifier = EID identifier = str(identifier) allowed_views = ('META', 'META_ABS', 'REF', 'FULL') if view not in allowed_views: raise ValueError('view parameter must be one of ' + ', '.join(allowed_views)) if id_type is None: id_type = detect_id_type(identifier) else: allowed_id_types = ('eid', 'pii', 'scopus_id', 'pubmed_id', 'doi') if id_type not in allowed_id_types: raise ValueError('id_type parameter must be one of ' + ', '.join(allowed_id_types)) # Load json Retrieval.__init__(self, identifier=identifier, id_type=id_type, api='AbstractRetrieval', refresh=refresh, view=view) self._json = self._json['abstracts-retrieval-response'] self._head = chained_get(self._json, ["item", "bibrecord", "head"], {}) path = ['source', 'additional-srcinfo', 'conferenceinfo', 'confevent'] self._confevent = chained_get(self._head, path, {})
def language(self): """Language of the article.""" return chained_get(self._json, ['language', '@xml:lang'])
def historical_identifier(self): """Scopus IDs of previous profiles now compromising this profile.""" hist = chained_get(self._json, ["coredata", 'historical-identifier'], []) return [d['$'].split(":")[-1] for d in hist] or None