def references(self): """List of namedtuples representing references listed in the abstract, in the form (position, id, doi, title, authors, sourcetitle, publicationyear, volume, issue, first, last, text, fulltext). `position` is the number at which the reference appears in the document, `id` is the Scopus ID of the referenced abstract (EID without the "2-s2.0-"), `authors` is a list of the names of the authors in the format "Surname, Initials", `first` and `last` refer to the page range, `text` is Scopus-provided information on the publication and `fulltext` is the text the authors used for the reference. Note: Requires the FULL view of the article. Might be empty even if refcount is positive. """ out = [] fields = 'position id doi title authors sourcetitle publicationyear '\ 'volume issue first last text fulltext' ref = namedtuple('Reference', fields) path = ['item', 'bibrecord', 'tail', 'bibliography', 'reference'] items = listify(chained_get(self._json, path, [])) for item in items: info = item['ref-info'] volisspag = info.get('ref-volisspag', {}) try: auth = listify(info['ref-authors']['author']) authors = [ ', '.join([d['ce:surname'], d['ce:initials']]) for d in auth ] except KeyError: # No authors given authors = None ids = listify(info['refd-itemidlist']['itemid']) try: doi = [d['$'] for d in ids if d['@idtype'] == 'DOI'][0] except IndexError: doi = None new = ref(position=item.get('@id'), id=[d['$'] for d in ids if d['@idtype'] == 'SGR'][0], doi=doi, authors=authors, title=info.get('ref-title', {}).get('ref-titletext'), sourcetitle=info.get('ref-sourcetitle'), publicationyear=info.get('ref-publicationyear', {}).get('@first'), volume=volisspag.get('voliss', {}).get('@volume'), issue=volisspag.get('voliss', {}).get('@issue'), first=volisspag.get('pagerange', {}).get('@first'), last=volisspag.get('pagerange', {}).get('@last'), text=info.get('ref-text'), fulltext=item.get('ref-fulltext')) out.append(new) return out or None
def sequencebank(self): """List of namedtuples representing biological entities defined or mentioned in the text, in the form (name, sequence_number, type). """ path = ['enhancement', 'sequencebanks', 'sequencebank'] items = listify(chained_get(self._head, path, [])) bank = namedtuple('Sequencebank', 'name sequence_number type') out = [] for item in items: numbers = listify(item['sequence-number']) for number in numbers: new = bank(name=item['@name'], sequence_number=number['$'], type=number['@type']) out.append(new) return out or None
def authors(self): """A list of namedtuples representing the article's authors, in the form (auid, indexed_name, surname, given_name, affiliation_id, affiliation, city, country). Note: The affiliation referred to here is what Scopus' algorithm determined as the main affiliation. Property `authorgroup` provides all affiliations. """ out = [] fields = 'auid indexed_name surname given_name affiliation' auth = namedtuple('Author', fields) for item in chained_get(self._json, ['authors', 'author'], []): affs = [a for a in listify(item.get('affiliation')) if a] if affs: aff = [aff.get('@id') for aff in affs] else: aff = None new = auth(auid=item['@auid'], surname=item.get('ce:surname'), indexed_name=item.get('ce:indexed-name'), affiliation=aff, given_name=chained_get( item, ['preferred-name', 'ce:given-name'])) out.append(new) return out or None
def classificationgroup(self): """List with (subject group ID, number of documents)-tuples.""" path = ['author-profile', 'classificationgroup', 'classifications', 'classification'] out = [(item['$'], item['@frequency']) for item in listify(chained_get(self._json, path, []))] return out or None
def __init__(self, author_id, refresh=False): """Class to represent a Scopus Author query by the scopus-id. Parameters ---------- author_id : str or int The ID of the author to search for. Optionally expressed as an Elsevier EID (i.e., in the form 9-s2.0-nnnnnnnn). refresh : bool (optional, default=False) Whether to refresh the cached file (if it exists) or not. Notes ----- The files are cached in ~/.scopus/author_retrieval/{author_id} (without eventually leading '9-s2.0-'). """ # Load json self._id = str(int(str(author_id).split('-')[-1])) Retrieval.__init__(self, self._id, 'AuthorRetrieval', refresh) self._json = self._json['author-retrieval-response'] # Checks try: self._json = self._json[0] except KeyError: # Incomplete forward alias_json = listify(self._json['alias']['prism:url']) alias = ', '.join([d['$'].split(':')[-1] for d in alias_json]) text = 'Author profile with ID {} has been merged and the main '\ 'profile is now one of {}. Please update your records '\ 'manually. Functionality of this object is '\ 'reduced.'.format(author_id, alias) warn(text, UserWarning)
def authors(self): """A list of namedtuples representing the article's authors, in the form (auid, indexed_name, surname, given_name, affiliation_id, affiliation, city, country). Note: Affiliations listed here are often incomplete and sometimes use the first author's affiliation for all others. Rather use property author_group. """ out = [] fields = 'auid indexed_name surname given_name affiliation' auth = namedtuple('Author', fields) for item in chained_get(self._json, ['authors', 'author'], []): affs = [a for a in listify(item.get('affiliation')) if a] if affs: aff = [aff.get('@id') for aff in affs] else: aff = None new = auth(auid=item['@auid'], surname=item.get('ce:surname'), indexed_name=item.get('ce:indexed-name'), affiliation=aff, given_name=chained_get( item, ['preferred-name', 'ce:given-name'])) out.append(new) return out or None
def __init__(self, author_id, refresh=False): """Class to represent a Scopus Author query by the scopus-id. Parameters ---------- author_id : str or int The ID of the author to search for. Optionally expressed as an Elsevier EID (i.e., in the form 9-s2.0-nnnnnnnn). refresh : bool (optional, default=False) Whether to refresh the cached file (if it exists) or not. Notes ----- The files are cached in ~/.scopus/author_retrieval/{author_id} (without eventually leading '9-s2.0-'). """ # Load json self._id = str(int(str(author_id).split('-')[-1])) Retrieval.__init__(self, identifier=self._id, api='AuthorRetrieval', refresh=refresh) self._json = self._json['author-retrieval-response'] # Checks try: self._json = self._json[0] except KeyError: # Incomplete forward alias_json = listify(self._json['alias']['prism:url']) alias = ', '.join([d['$'].split(':')[-1] for d in alias_json]) text = 'Author profile with ID {} has been merged and the main '\ 'profile is now one of {}. Please update your records '\ 'manually. Functionality of this object is '\ 'reduced.'.format(author_id, alias) warn(text, UserWarning)
def isbn(self): """ISBNs belonging to publicationName as tuple of variying length, (e.g. ISBN-10 or ISBN-13).""" isbns = listify(chained_get(self._head, ['source', 'isbn'], [])) if len(isbns) == 0: return None else: return tuple((i['$'] for i in isbns))
def classificationgroup(self): """List with (subject group ID, number of documents)-tuples.""" path = [ 'author-profile', 'classificationgroup', 'classifications', 'classification' ] out = [(item['$'], item['@frequency']) for item in listify(chained_get(self._json, path, []))] return out or None
def idxterms(self): """List of index terms.""" try: terms = listify(self._json.get("idxterms", {}).get('mainterm', [])) except AttributeError: # idxterms is empty return None try: return [d['$'] for d in terms] except AttributeError: return None
def subject_areas(self): """List of namedtuples containing subject areas of the article in the form (). Note: Requires the FULL view of the article. """ area = namedtuple('Area', 'area abbreviation code') path = ['subject-areas', 'subject-area'] out = [area(area=item['$'], abbreviation=item['@abbrev'], code=item['@code']) for item in listify(chained_get(self._json, path, []))] return out or None
def chemicals(self): """List of namedtuples representing chemical entities in the form (source, chemical_name, cas_registry_number). In case multiple numbers given, they are joined on ";". """ path = ['enhancement', 'chemicalgroup', 'chemicals'] items = listify(chained_get(self._head, path, [])) chemical = namedtuple('Chemical', 'source chemical_name cas_registry_number') out = [] for item in items: for chem in listify(item['chemical']): number = chem.get('cas-registry-number') try: # Multiple numbers given num = ";".join([n['$'] for n in number]) except TypeError: num = number new = chemical(source=item['@source'], cas_registry_number=num, chemical_name=chem['chemical-name']) out.append(new) return out or None
def name_variants(self): """List of named tuples containing variants of the author name with number of documents published with that variant. """ fields = 'indexed_name initials surname given_name doc_count' variant = namedtuple('Variant', fields) path = ['author-profile', 'name-variant'] out = [variant(indexed_name=var['indexed-name'], surname=var['surname'], doc_count=var.get('@doc-count'), initials=var['initials'], given_name=var.get('given-name')) for var in listify(chained_get(self._json, path, []))] return out or None
def journal_history(self): """List of named tuples of authored publications in the form (sourcetitle, abbreviation, type, issn). issn is only given for journals. abbreviation and issn may be None. """ jour = namedtuple('Journal', 'sourcetitle abbreviation type issn') path = ['author-profile', 'journal-history', 'journal'] hist = [jour(sourcetitle=pub['sourcetitle'], issn=pub.get('issn'), abbreviation=pub.get('sourcetitle-abbrev'), type=pub['@type']) for pub in listify(chained_get(self._json, path, []))] return hist or None
def authorgroup(self): """A list of namedtuples representing the article's authors organized by affiliation, in the form (affiliation_id, dptid, organization, city, postalcode, addresspart, country, auid, indexed_name, surname, given_name). If "given_name" is not present, fall back to initials. Note: Affiliation information might be missing or mal-assigned even when it lookes correct in the web view. In this case please request a correction. """ out = [] fields = 'affiliation_id dptid organization city postalcode '\ 'addresspart country auid indexed_name surname given_name' auth = namedtuple('Author', fields) items = listify(self._head.get('author-group', [])) for item in items: # Affiliation information aff = item.get('affiliation', {}) try: aff_ids = listify(aff['affiliation-id']) aff_id = ", ".join([a["@afid"] for a in aff_ids]) except KeyError: aff_id = aff.get("@afid") org = _get_org(aff) # Author information (might relate to collaborations) authors = listify(item.get('author', item.get('collaboration', []))) for au in authors: try: given = au.get('ce:given-name', au['ce:initials']) except KeyError: # Collaboration given = au.get('ce:text') new = auth(affiliation_id=aff_id, organization=org, city=aff.get('city'), dptid=aff.get("@dptid"), postalcode=aff.get('postal-code'), addresspart=aff.get('address-part'), country=aff.get('country'), auid=au.get('@auid'), surname=au.get('ce:surname'), given_name=given, indexed_name=chained_get(au, ['preferred-name', 'ce:indexed-name'])) out.append(new) return out or None
def authorgroup(self): """A list of namedtuples representing the article's authors organized by affiliation, in the form (affiliation_id, organization, city_group, country, auid, indexed_name, surname, given_name). If "given_name" is not present, fall back to initials. Note: Affiliation information might be missing or mal-assigned even when it lookes correct in the web view. In this case please request a correction. """ out = [] fields = 'affiliation_id organization city_group country '\ 'auid indexed_name surname given_name' auth = namedtuple('Author', fields) items = listify(self._head.get('author-group', [])) for item in items: # Affiliation information aff = item.get('affiliation', {}) aff_ids = listify(aff['affiliation-id']) aff_id = ", ".join([a["@afid"] for a in aff_ids]) org = _get_org(aff) # Author information (might relate to collaborations) authors = listify(item.get('author', item.get('collaboration', []))) for au in authors: try: given = au.get('ce:given-name', au['ce:initials']) except KeyError: # Collaboration given = au.get('ce:text') new = auth(affiliation_id=aff_id, organization=org, city_group=aff.get('city-group'), country=aff.get('country'), auid=au.get('@auid'), surname=au.get('ce:surname'), given_name=given, indexed_name=chained_get( au, ['preferred-name', 'ce:indexed-name'])) out.append(new) return out or None
def subject_areas(self): """List of namedtuples containing subject areas of the article in the form (). Note: Requires the FULL view of the article. """ area = namedtuple('Area', 'area abbreviation code') path = ['subject-areas', 'subject-area'] out = [ area(area=item['$'], abbreviation=item['@abbrev'], code=item['@code']) for item in listify(chained_get(self._json, path, [])) ] return out or None
def affiliation(self): """A list of namedtuples representing listed affiliations in the form (id, name, city, country). Note: Might be empty. """ out = [] aff = namedtuple('Affiliation', 'id name city country') affs = listify(self._json.get('affiliation', [])) for item in affs: new = aff(id=item.get('@id'), name=item.get('affilname'), city=item.get('affiliation-city'), country=item.get('affiliation-country')) out.append(new) return out or None
def journal_history(self): """List of named tuples of authored publications in the form (sourcetitle, abbreviation, type, issn). issn is only given for journals. abbreviation and issn may be None. """ jour = namedtuple('Journal', 'sourcetitle abbreviation type issn') path = ['author-profile', 'journal-history', 'journal'] hist = [ jour(sourcetitle=pub['sourcetitle'], issn=pub.get('issn'), abbreviation=pub.get('sourcetitle-abbrev'), type=pub['@type']) for pub in listify(chained_get(self._json, path, [])) ] return hist or None
def contributor_group(self): """List of namedtuples representing contributors compiled by Scopus, in the form (given_name, initials, surname, indexed_name, role). """ items = listify(chained_get(self._head, ['source', 'contributor-group'], [])) out = [] fields = 'given_name initials surname indexed_name role' pers = namedtuple('Contributor', fields) for item in items: entry = item.get('contributor', {}) new = pers(indexed_name=entry.get('ce:indexed-name'), role=entry.get('@role'), surname=entry.get('ce:surname'), given_name=entry.get('ce:given-name'), initials=entry.get('ce:initials')) out.append(new) return out or None
def funding(self): """List of namedtuples parsed funding information in the form (agency string id acronym country). """ path = ['item', 'xocs:meta', 'xocs:funding-list', 'xocs:funding'] funds = listify(chained_get(self._json, path, [])) out = [] fund = namedtuple('Funding', 'agency string id acronym country') for item in funds: new = fund(agency=item.get('xocs:funding-agency'), string=item.get('xocs:funding-agency-matched-string'), id=item.get('xocs:funding-agency-id'), acronym=item.get('xocs:funding-agency-acronym'), country=item.get('xocs:funding-agency-country')) out.append(new) return out or None
def name_variants(self): """List of named tuples containing variants of the author name with number of documents published with that variant. """ fields = 'indexed_name initials surname given_name doc_count' variant = namedtuple('Variant', fields) path = ['author-profile', 'name-variant'] out = [ variant(indexed_name=var['indexed-name'], surname=var['surname'], doc_count=var.get('@doc-count'), initials=var['initials'], given_name=var.get('given-name')) for var in listify(chained_get(self._json, path, [])) ] return out or None
def contributor_group(self): """List of namedtuples representing contributors compiled by Scopus, in the form (given_name, initials, surname, indexed_name, role). """ items = listify( chained_get(self._head, ['source', 'contributor-group'], [])) out = [] fields = 'given_name initials surname indexed_name role' pers = namedtuple('Contributor', fields) for item in items: entry = item.get('contributor', {}) new = pers(indexed_name=entry.get('ce:indexed-name'), role=entry.get('@role'), surname=entry.get('ce:surname'), given_name=entry.get('ce:given-name'), initials=entry.get('ce:initials')) out.append(new) return out or None
def authors(self): """A list of namedtuples representing the article's authors, in the form (auid, indexed_name, surname, given_name, affiliation_id, affiliation, city, country). Note: The affiliation referred to here is what Scopus' algorithm determined as the main affiliation. Property `authorgroup` provides all affiliations. """ out = [] fields = 'auid indexed_name surname given_name affiliation' auth = namedtuple('Author', fields) for item in chained_get(self._json, ['authors', 'author'], []): affs = [a for a in listify(item.get('affiliation')) if a] if affs: aff = [aff.get('@id') for aff in affs] else: aff = None new = auth(auid=item['@auid'], surname=item.get('ce:surname'), indexed_name=item.get('ce:indexed-name'), affiliation=aff, given_name=chained_get(item, ['preferred-name', 'ce:given-name'])) out.append(new) return out or None
def references(self): """List of namedtuples representing references listed in the abstract, in the form (position, id, doi, title, authors, authors_auid, authors_affiliationid, sourcetitle, publicationyear, volume, issue, first, last, citedbycount, text, fulltext). `position` is the number at which the reference appears in the document, `id` is the Scopus ID of the referenced abstract (EID without the "2-s2.0-"), `authors` is a string of the names of the authors in the format "Surname1, Initials1; Surname2, Initials2", `authors_auid` is a string of the author IDs joined on "; ", `authors_affiliationid` is a string of the authors' affiliation IDs joined on "; ", `sourcetitle` is the name of the source (e.g. the journal), `publicationyear` is the year of the publication as a string, `volume` and `issue`, are strings referring to the volume and issue, `first` and `last` refer to the page range, `citedbycount` is a string for the total number of citations of the cited item, `text` is Scopus-provided information on the publication, `fulltext` is the text the authors used for the reference. Note: Requires either the FULL view or REF view of the article. Might be empty even if refcount is positive. Specific fields can be empty. Author lists (authors, authors_auid, authors_affiliationid) may contain duplicates but have been filtered of None's. """ out = [] fields = 'position id doi title authors authors_auid '\ 'authors_affiliationid sourcetitle publicationyear volume '\ 'issue first last citedbycount text fulltext' ref = namedtuple('Reference', fields) path = ['item', 'bibrecord', 'tail', 'bibliography', 'reference'] items = listify(chained_get(self._json, path, self._json.get('references', {}).get('reference', []))) for item in items: info = item.get('ref-info', item) volisspag = info.get('volisspag', {}) or {} if isinstance(volisspag, list): volisspag = volisspag[0] # Parse author information try: # FULL view parsing auth = listify(item['ref-info']['ref-authors']['author']) authors = [', '.join([d['ce:surname'], d['ce:initials']]) for d in auth] auids = None affids = None except KeyError: # REF view parsing auth = (info.get('author-list') or {}).get('author', []) authors = [', '.join(filter(None, [d.get('ce:surname'), d.get('ce:given-name')])) for d in auth] auids = "; ".join(filter(None, [d.get('@auid') for d in auth])) affs = filter(None, [d.get('affiliation') for d in auth]) affids = "; ".join([aff.get('@id') for aff in affs]) # Parse IDs try: ids = listify(info['refd-itemidlist']['itemid']) except KeyError: ids = [] try: doi = _select_by_idtype(ids, 'DOI')[0] except IndexError: doi = info.get('ce:doi') try: scopus_id = _select_by_idtype(ids, 'SGR')[0] except IndexError: scopus_id = info.get('scopus-id') # Combine information new = ref(position=item.get('@id'), id=scopus_id, doi=doi, authors="; ".join(authors), authors_auid=auids or None, authors_affiliationid=affids or None, title=info.get('ref-title', {}).get('ref-titletext', info.get('title')), sourcetitle=info.get('ref-sourcetitle', info.get('sourcetitle')), publicationyear=info.get('ref-publicationyear', {}).get('@first'), volume=volisspag.get('voliss', {}).get('@volume'), issue=volisspag.get('voliss', {}).get('@issue'), first=volisspag.get('pagerange', {}).get('@first'), last=volisspag.get('pagerange', {}).get('@last'), citedbycount=info.get('citedby-count'), text=info.get('ref-text'), fulltext=item.get('ref-fulltext')) out.append(new) return out or None
def results(self): """A list of namedtuples in the form (eid doi pii pubmed_id title subtype creator afid affilname affiliation_city affiliation_country author_count author_names author_ids author_afids coverDate coverDisplayDate publicationName issn source_id eIssn aggregationType volume issueIdentifier article_number pageRange description authkeywords citedby_count openaccess fund_acr fund_no fund_sponsor). Field definitions correspond to https://dev.elsevier.com/guides/ScopusSearchViews.htm, except for afid, affilname, affiliation_city, affiliation_country, author_count, author_names, author_ids and author_afids: These information are joined on ";". In case an author has multiple affiliations, they are joined on "-" (e.g. Author1Aff;Author2Aff1-Author2Aff2). Notes ----- The list of authors and the list of affiliations per author are deduplicated. """ out = [] fields = 'eid doi pii pubmed_id title subtype creator afid affilname '\ 'affiliation_city affiliation_country author_count '\ 'author_names author_ids author_afids coverDate '\ 'coverDisplayDate publicationName issn source_id eIssn '\ 'aggregationType volume issueIdentifier article_number '\ 'pageRange description authkeywords citedby_count '\ 'openaccess fund_acr fund_no fund_sponsor' doc = namedtuple('Document', fields) for item in self._json: info = {} # Parse affiliations try: info["affilname"] = _join(item['affiliation'], 'affilname') info["afid"] = _join(item['affiliation'], 'afid') info["aff_city"] = _join(item['affiliation'], 'affiliation-city') info["aff_country"] = _join(item['affiliation'], 'affiliation-country') except KeyError: pass # Parse authors try: # Deduplicate list of authors authors = _deduplicate(item['author']) # Extract information surnames = _replace_none([d['surname'] for d in authors]) firstnames = _replace_none([d['given-name'] for d in authors]) info["auth_names"] = ";".join([ ", ".join([t[0], t[1]]) for t in zip(surnames, firstnames) ]) info["auth_ids"] = ";".join([d['authid'] for d in authors]) affs = [] for auth in authors: aff = listify(_deduplicate(auth.get('afid', []))) affs.append('-'.join([d['$'] for d in aff])) info["auth_afid"] = (';'.join(affs) or None) except KeyError: pass date = item.get('prism:coverDate') if isinstance(date, list): date = date[0].get('$') new = doc(article_number=item.get('article-number'), title=item.get('dc:title'), fund_sponsor=item.get('fund-sponsor'), subtype=item.get('subtype'), issn=item.get('prism:issn'), creator=item.get('dc:creator'), affilname=info.get("affilname"), author_names=info.get("auth_names"), doi=item.get('prism:doi'), coverDate=date, volume=item.get('prism:volume'), coverDisplayDate=item.get('prism:coverDisplayDate'), publicationName=item.get('prism:publicationName'), source_id=item.get('source-id'), author_ids=info.get("auth_ids"), aggregationType=item.get('prism:aggregationType'), issueIdentifier=item.get('prism:issueIdentifier'), pageRange=item.get('prism:pageRange'), author_afids=info.get("auth_afid"), fund_no=item.get('fund-no'), affiliation_country=info.get("aff_country"), citedby_count=item.get('citedby-count'), openaccess=item.get('openaccess'), eIssn=item.get('prism:eIssn'), author_count=item.get('author-count', {}).get('$'), affiliation_city=info.get("aff_city"), afid=info.get("afid"), description=item.get('dc:description'), pii=item.get('pii'), authkeywords=item.get('authkeywords'), eid=item['eid'], fund_acr=item.get('fund-acr'), pubmed_id=item.get('pubmed-id')) out.append(new) return out or None
def results(self): """A list of namedtuples in the form (eid doi pii pubmed_id title subtype creator afid affilname affiliation_city affiliation_country author_count author_names author_ids author_afids coverDate coverDisplayDate publicationName issn source_id eIssn aggregationType volume issueIdentifier article_number pageRange description authkeywords citedby_count openaccess fund_acr fund_no fund_sponsor). Field definitions correspond to https://dev.elsevier.com/guides/ScopusSearchViews.htm, except for afid, affilname, affiliation_city, affiliation_country, author_count, author_names, author_ids and author_afids: These information are joined on ";". In case an author has multiple affiliations, they are joined on "-" (e.g. Author1Aff;Author2Aff1-Author2Aff2). Notes ----- The list of authors and the list of affiliations per author are deduplicated. """ out = [] fields = 'eid doi pii pubmed_id title subtype creator afid affilname '\ 'affiliation_city affiliation_country author_count '\ 'author_names author_ids author_afids coverDate '\ 'coverDisplayDate publicationName issn source_id eIssn '\ 'aggregationType volume issueIdentifier article_number '\ 'pageRange description authkeywords citedby_count '\ 'openaccess fund_acr fund_no fund_sponsor' doc = namedtuple('Document', fields) for item in self._json: info = {} # Parse affiliations try: info["affilname"] = _join(item['affiliation'], 'affilname') info["afid"] = _join(item['affiliation'], 'afid') info["aff_city"] = _join(item['affiliation'], 'affiliation-city') info["aff_country"] = _join(item['affiliation'], 'affiliation-country') except KeyError: pass # Parse authors try: # Deduplicate list of authors authors = _deduplicate(item['author']) # Extract information surnames = _replace_none([d['surname'] for d in authors]) firstnames = _replace_none([d['given-name'] for d in authors]) info["auth_names"] = ";".join([", ".join([t[0], t[1]]) for t in zip(surnames, firstnames)]) info["auth_ids"] = ";".join([d['authid'] for d in authors]) affs = [] for auth in authors: aff = listify(_deduplicate(auth.get('afid', []))) affs.append('-'.join([d['$'] for d in aff])) info["auth_afid"] = (';'.join(affs) or None) except KeyError: pass date = item.get('prism:coverDate') if isinstance(date, list): date = date[0].get('$') new = doc(article_number=item.get('article-number'), title=item.get('dc:title'), fund_sponsor=item.get('fund-sponsor'), subtype=item.get('subtype'), issn=item.get('prism:issn'), creator=item.get('dc:creator'), affilname=info.get("affilname"), author_names=info.get("auth_names"), doi=item.get('prism:doi'), coverDate=date, volume=item.get('prism:volume'), coverDisplayDate=item.get('prism:coverDisplayDate'), publicationName=item.get('prism:publicationName'), source_id=item.get('source-id'), author_ids=info.get("auth_ids"), aggregationType=item.get('prism:aggregationType'), issueIdentifier=item.get('prism:issueIdentifier'), pageRange=item.get('prism:pageRange'), author_afids=info.get("auth_afid"), fund_no=item.get('fund-no'), affiliation_country=info.get("aff_country"), citedby_count=item.get('citedby-count'), openaccess=item.get('openaccess'), eIssn=item.get('prism:eIssn'), author_count=item.get('author-count', {}).get('$'), affiliation_city=info.get("aff_city"), afid=info.get("afid"), description=item.get('dc:description'), pii=item.get('pii'), authkeywords=item.get('authkeywords'), eid=item['eid'], fund_acr=item.get('fund-acr'), pubmed_id=item.get('pubmed-id')) out.append(new) return out or None
def references(self): """List of namedtuples representing references listed in the abstract, in the form (position, id, doi, title, authors, authors_auid, authors_affiliationid, sourcetitle, publicationyear, volume, issue, first, last, citedbycount, text, fulltext). `position` is the number at which the reference appears in the document, `id` is the Scopus ID of the referenced abstract (EID without the "2-s2.0-"), `authors` is a string of the names of the authors in the format "Surname1, Initials1; Surname2, Initials2", `authors_auid` is a string of the author IDs joined on "; ", `authors_affiliationid` is a string of the authors' affiliation IDs joined on "; ", `sourcetitle` is the name of the source (e.g. the journal), `publicationyear` is the year of the publication as a string, `volume` and `issue`, are strings referring to the volume and issue, `first` and `last` refer to the page range, `citedbycount` is a string for the total number of citations of the cited item, `text` is Scopus-provided information on the publication, `fulltext` is the text the authors used for the reference. Note: Requires either the FULL view or REF view of the article. Might be empty even if refcount is positive. Specific fields can be empty. Author lists (authors, authors_auid, authors_affiliationid) may contain duplicates but have been filtered of None's. """ out = [] fields = 'position id doi title authors authors_auid '\ 'authors_affiliationid sourcetitle publicationyear volume '\ 'issue first last citedbycount text fulltext' ref = namedtuple('Reference', fields) path = ['item', 'bibrecord', 'tail', 'bibliography', 'reference'] items = listify( chained_get(self._json, path, self._json.get('references', {}).get('reference', []))) for item in items: info = item.get('ref-info', item) volisspag = info.get('volisspag', {}) or {} if isinstance(volisspag, list): volisspag = volisspag[0] # Parse author information try: # FULL view parsing auth = listify(item['ref-info']['ref-authors']['author']) authors = [ ', '.join([d['ce:surname'], d['ce:initials']]) for d in auth ] auids = None affids = None except KeyError: # REF view parsing auth = (info.get('author-list') or {}).get('author', []) authors = [ ', '.join( filter(None, [d.get('ce:surname'), d.get('ce:given-name')])) for d in auth ] auids = "; ".join(filter(None, [d.get('@auid') for d in auth])) affs = filter(None, [d.get('affiliation') for d in auth]) affids = "; ".join([aff.get('@id') for aff in affs]) # Parse IDs try: ids = listify(info['refd-itemidlist']['itemid']) except KeyError: ids = [] try: doi = [d['$'] for d in ids if d['@idtype'] == 'DOI'][0] except IndexError: doi = info.get('ce:doi') try: scopus_id = [d['$'] for d in ids if d['@idtype'] == 'SGR'][0] except IndexError: scopus_id = info.get('scopus-id') # Combine information new = ref(position=item.get('@id'), id=scopus_id, doi=doi, authors="; ".join(authors), authors_auid=auids or None, authors_affiliationid=affids or None, title=info.get('ref-title', {}).get('ref-titletext', info.get('title')), sourcetitle=info.get('ref-sourcetitle', info.get('sourcetitle')), publicationyear=info.get('ref-publicationyear', {}).get('@first'), volume=volisspag.get('voliss', {}).get('@volume'), issue=volisspag.get('voliss', {}).get('@issue'), first=volisspag.get('pagerange', {}).get('@first'), last=volisspag.get('pagerange', {}).get('@last'), citedbycount=info.get('citedby-count'), text=info.get('ref-text'), fulltext=item.get('ref-fulltext')) out.append(new) return out or None