def authorgroup(self) -> Optional[List[NamedTuple]]: """A list of namedtuples representing the article's authors organized by affiliation, in the form (affiliation_id, dptid, organization, city, postalcode, addresspart, country, collaboration, auid, orcid, indexed_name, surname, given_name). If "given_name" is not present, fall back to initials. Note: Affiliation information might be missing or mal-assigned even when it looks correct in the web view. In this case please request a correction. It is generally missing for collaborations. """ # Information can be one of three forms: # 1. A dict with one key (author) or two keys (affiliation and author) # 2. A list of dicts with as in 1, one for each affiliation (incl. missing) # 3. A list of two dicts with one key each (author and collaboration) # Initialization fields = 'affiliation_id dptid organization city postalcode '\ 'addresspart country collaboration auid orcid indexed_name '\ 'surname given_name' auth = namedtuple('Author', fields) items = listify(self._head.get('author-group', [])) index_path = ['preferred-name', 'ce:indexed-name'] # Check for collaboration keys = [k for x in items for k in list(x.keys())] if "collaboration" in keys: collaboration = items.pop(-1)['collaboration'] else: collaboration = {'ce:indexed-name': None} # Iterate through each author-affiliation combination out = [] for item in items: if not item: continue # Affiliation information aff = item.get('affiliation', {}) aff_id = make_int_if_possible(aff.get("@afid")) dep_id = make_int_if_possible(aff.get("@dptid")) org = _get_org(aff) # Author information (might relate to collaborations) authors = listify(item.get('author', item.get('collaboration', []))) for au in authors: try: given = au.get('ce:given-name', au['ce:initials']) except KeyError: # Collaboration given = au.get('ce:text') new = auth(affiliation_id=aff_id, organization=org, city=aff.get('city'), dptid=dep_id, postalcode=aff.get('postal-code'), addresspart=aff.get('address-part'), country=aff.get('country'), collaboration=collaboration.get('ce:indexed-name'), auid=int(au['@auid']), orcid=au.get('@orcid'), surname=au.get('ce:surname'), given_name=given, indexed_name=chained_get(au, index_path)) out.append(new) return out or None
def sequencebank(self): """List of namedtuples representing biological entities defined or mentioned in the text, in the form (name, sequence_number, type). """ path = ['enhancement', 'sequencebanks', 'sequencebank'] items = listify(chained_get(self._head, path, [])) bank = namedtuple('Sequencebank', 'name sequence_number type') out = [] for item in items: numbers = listify(item['sequence-number']) for number in numbers: new = bank(name=item['@name'], sequence_number=number['$'], type=number['@type']) out.append(new) return out or None
def authorgroup(self): """A list of namedtuples representing the article's authors organized by affiliation, in the form (affiliation_id, dptid, organization, city, postalcode, addresspart, country, auid, indexed_name, surname, given_name). If "given_name" is not present, fall back to initials. Note: Affiliation information might be missing or mal-assigned even when it lookes correct in the web view. In this case please request a correction. """ out = [] fields = 'affiliation_id dptid organization city postalcode '\ 'addresspart country auid indexed_name surname given_name' auth = namedtuple('Author', fields) items = listify(self._head.get('author-group', [])) index_path = ['preferred-name', 'ce:indexed-name'] for item in items: if not item: continue # Affiliation information aff = item.get('affiliation', {}) try: aff_ids = listify(aff['affiliation-id']) aff_id = ", ".join([a["@afid"] for a in aff_ids]) except KeyError: aff_id = aff.get("@afid") org = _get_org(aff) # Author information (might relate to collaborations) authors = listify(item.get('author', item.get('collaboration', []))) for au in authors: try: given = au.get('ce:given-name', au['ce:initials']) except KeyError: # Collaboration given = au.get('ce:text') new = auth(affiliation_id=aff_id, organization=org, city=aff.get('city'), dptid=aff.get("@dptid"), postalcode=aff.get('postal-code'), addresspart=aff.get('address-part'), country=aff.get('country'), auid=au.get('@auid'), surname=au.get('ce:surname'), given_name=given, indexed_name=chained_get(au, index_path)) out.append(new) return out or None
def correspondence(self): """List of namedtuples representing the authors to whom correspondence should be addressed, in the form (surname, initials, organization, country, city_group). Multiple organziations are joined on semicolon. """ fields = 'surname initials organization country city_group' auth = namedtuple('Correspondence', fields) items = listify(self._head.get('correspondence', [])) out = [] for item in items: aff = item.get('affiliation', {}) try: org = aff['organization'] try: org = org['$'] except TypeError: # Multiple names given org = "; ".join([d['$'] for d in org]) except KeyError: org = None new = auth(surname=item.get('person', {}).get('ce:surname'), initials=item.get('person', {}).get('ce:initials'), organization=org, country=aff.get('country'), city_group=aff.get('city-group')) out.append(new) return out or None
def __init__(self, author_id, refresh=False): """Class to represent a Scopus Author query by the scopus-id. Parameters ---------- author_id : str or int The ID of the author to search for. Optionally expressed as an Elsevier EID (i.e., in the form 9-s2.0-nnnnnnnn). refresh : bool (optional, default=False) Whether to refresh the cached file (if it exists) or not. Notes ----- The files are cached in ~/.scopus/author_retrieval/ENHANCED/{author_id} (without eventually leading '9-s2.0-'). """ # Load json view = "ENHANCED" # In case Scopus adds different views in future self._id = str(int(str(author_id).split('-')[-1])) Retrieval.__init__(self, identifier=self._id, api='AuthorRetrieval', refresh=refresh, view=view) self._json = self._json['author-retrieval-response'] # Checks try: self._json = self._json[0] except KeyError: # Incomplete forward alias_json = listify(self._json['alias']['prism:url']) alias = ', '.join([d['$'].split(':')[-1] for d in alias_json]) text = 'Author profile with ID {} has been merged and the main '\ 'profile is now one of {}. Please update your records '\ 'manually. Functionality of this object is '\ 'reduced.'.format(author_id, alias) warn(text, UserWarning)
def authors(self): """A list of namedtuples representing the article's authors, in the form (auid, indexed_name, surname, given_name, affiliation_id, affiliation, city, country). Note: The affiliation referred to here is what Scopus' algorithm determined as the main affiliation. Property `authorgroup` provides all affiliations. """ out = [] fields = 'auid indexed_name surname given_name affiliation' auth = namedtuple('Author', fields) for item in chained_get(self._json, ['authors', 'author'], []): affs = [a for a in listify(item.get('affiliation')) if a] if affs: aff = [aff.get('@id') for aff in affs] else: aff = None new = auth(auid=item['@auid'], surname=item.get('ce:surname'), indexed_name=item.get('ce:indexed-name'), affiliation=aff, given_name=chained_get( item, ['preferred-name', 'ce:given-name'])) out.append(new) return out or None
def funding(self) -> Optional[List[NamedTuple]]: """List of namedtuples parsed funding information in the form (agency, agency_id, string, funding_id, acronym, country). """ def _get_funding_id(f_dict: dict) -> list: funding_get = f_dict.get('xocs:funding-id', []) try: return [v['$'] for v in funding_get] or None # multiple or empty except TypeError: return [funding_get] # single path = ['item', 'xocs:meta', 'xocs:funding-list', 'xocs:funding'] funds = listify(chained_get(self._json, path, [])) out = [] fields = 'agency agency_id string funding_id acronym country' fund = namedtuple('Funding', fields) for item in funds: new = fund(agency=item.get('xocs:funding-agency'), agency_id=item.get('xocs:funding-agency-id'), string=item.get('xocs:funding-agency-matched-string'), funding_id=_get_funding_id(item), acronym=item.get('xocs:funding-agency-acronym'), country=item.get('xocs:funding-agency-country')) out.append(new) return out or None
def classificationgroup(self): """List with (subject group ID, number of documents)-tuples.""" path = ['author-profile', 'classificationgroup', 'classifications', 'classification'] out = [(item['$'], item['@frequency']) for item in listify(chained_get(self._json, path, []))] return out or None
def authors(self) -> Optional[List[NamedTuple]]: """A list of namedtuples representing the article's authors, in the form (auid, indexed_name, surname, given_name, affiliation). In case multiple affiliation IDs are given, they are joined on ";". Note: The affiliation referred to here is what Scopus' algorithm determined as the main affiliation. Property `authorgroup` provides all affiliations. """ out = [] fields = 'auid indexed_name surname given_name affiliation' auth = namedtuple('Author', fields) for item in chained_get(self._json, ['authors', 'author'], []): affs = [a for a in listify(item.get('affiliation')) if a] or None try: aff = ";".join([aff.get('@id') for aff in affs]) except TypeError: aff = None new = auth(auid=int(item['@auid']), surname=item.get('ce:surname'), indexed_name=item.get('ce:indexed-name'), affiliation=aff, given_name=chained_get( item, ['preferred-name', 'ce:given-name'])) out.append(new) return out or None
def contributor_group(self): """List of namedtuples representing contributors compiled by Scopus, in the form (given_name, initials, surname, indexed_name, role). """ path = ['source', 'contributor-group'] items = listify(chained_get(self._head, path, [])) out = [] fields = 'given_name initials surname indexed_name role' pers = namedtuple('Contributor', fields) for item in items: try: entry = item.get('contributor', {}) new = pers(indexed_name=entry.get('ce:indexed-name'), role=entry.get('@role'), surname=entry.get('ce:surname'), given_name=entry.get('ce:given-name'), initials=entry.get('ce:initials')) out.append(new) except AttributeError: for i in entry: e = i.get('contributor', {}) new = pers(indexed_name=e.get('ce:indexed-name'), role=e.get('@role'), surname=e.get('ce:surname'), given_name=e.get('ce:given-name'), initials=e.get('ce:initials')) out.append(new) return out or None
def isbn(self): """ISBNs belonging to publicationName as tuple of variying length, (e.g. ISBN-10 or ISBN-13).""" isbns = listify(chained_get(self._head, ['source', 'isbn'], [])) if len(isbns) == 0: return None else: return tuple((i['$'] for i in isbns))
def __init__(self, author_id, refresh=False, view="ENHANCED"): """Interaction with the Author Retrieval API. Parameters ---------- author_id : str or int The ID of the author to search for. Optionally expressed as an Elsevier EID (i.e., in the form 9-s2.0-nnnnnnnn). refresh : bool or int (optional, default=False) Whether to refresh the cached file if it exists or not. If int is passed, cached file will be refreshed if the number of days since last modification exceeds that value. view : str (optional, default=META_ABS) The view of the file that should be downloaded. Allowed values: METRICS, LIGHT, STANDARD, ENHANCED, where STANDARD includes all information of LIGHT view and ENHANCED includes all information of any view. For details see https://dev.elsevier.com/sc_author_retrieval_views.html. Note: Neither the BASIC nor the DOCUMENTS view are not active, although documented. Examples -------- See https://pybliometrics.readthedocs.io/en/stable/examples/AuthorRetrieval.html. Notes ----- The directory for cached results is `{path}/ENHANCED/{author_id}`, where `path` is specified in `~/.scopus/config.ini` and `author_id` is stripped of an eventually leading `'9-s2.0-'`. """ # Checks allowed_views = ('METRICS', 'LIGHT', 'STANDARD', 'ENHANCED') check_parameter_value(view, allowed_views, "view") # Load json self._id = str(int(str(author_id).split('-')[-1])) Retrieval.__init__(self, identifier=self._id, api='AuthorRetrieval', refresh=refresh, view=view) self._json = self._json['author-retrieval-response'] try: self._json = self._json[0] except KeyError: # Incomplete forward alias_json = listify(self._json['alias']['prism:url']) self._alias = [d['$'].split(':')[-1] for d in alias_json] alias_str = ', '.join(self._alias) text = f'Author profile with ID {author_id} has been merged and '\ f'the main profile is now one of {alias_str}. Please update '\ 'your records manually. Functionality of this object is '\ 'reduced.' warn(text, UserWarning) else: self._alias = None self._profile = self._json.get("author-profile", {})
def subject_areas(self): """List of namedtuples containing subject areas of the article in the form (area abbreviation code). Note: Requires the FULL view of the article. """ area = namedtuple('Area', 'area abbreviation code') path = ['subject-areas', 'subject-area'] out = [area(area=item['$'], abbreviation=item['@abbrev'], code=item['@code']) for item in listify(chained_get(self._json, path, []))] return out or None
def name_variants(self): """List of named tuples containing variants of the author name with number of documents published with that variant. """ fields = 'indexed_name initials surname given_name doc_count' variant = namedtuple('Variant', fields) out = [variant(indexed_name=var['indexed-name'], surname=var['surname'], doc_count=var.get('@doc-count'), initials=var['initials'], given_name=var.get('given-name')) for var in listify(self._profile.get('name-variant', []))] return out or None
def journal_history(self): """List of named tuples of authored publications in the form (sourcetitle, abbreviation, type, issn). issn is only given for journals. abbreviation and issn may be None. """ jour = namedtuple('Journal', 'sourcetitle abbreviation type issn') path = ['author-profile', 'journal-history', 'journal'] hist = [jour(sourcetitle=pub.get('sourcetitle'), issn=pub.get('issn'), abbreviation=pub.get('sourcetitle-abbrev'), type=pub.get('@type')) for pub in listify(chained_get(self._json, path, []))] return hist or None
def idxterms(self): """List of index terms (these are just one category of those Scopus provides in the web version) .""" try: terms = listify(self._json.get("idxterms", {}).get('mainterm', [])) except AttributeError: # idxterms is empty return None try: return [d['$'] for d in terms] or None except AttributeError: return None
def chemicals(self): """List of namedtuples representing chemical entities in the form (source, chemical_name, cas_registry_number). In case multiple numbers given, they are joined on ";". """ path = ['enhancement', 'chemicalgroup', 'chemicals'] items = listify(chained_get(self._head, path, [])) fields = 'source chemical_name cas_registry_number' chemical = namedtuple('Chemical', fields) out = [] for item in items: for chem in listify(item['chemical']): number = chem.get('cas-registry-number') try: # Multiple numbers given num = ";".join([n['$'] for n in number]) except TypeError: num = number new = chemical(source=item['@source'], cas_registry_number=num, chemical_name=chem['chemical-name']) out.append(new) return out or None
def isbn(self): """ISBNs belonging to publicationName as tuple of variying length, (e.g. ISBN-10 or ISBN-13).""" isbns = listify(chained_get(self._head, ['source', 'isbn'], [])) try: if len(isbns) == 0: return None elif isinstance(isbns, str): return tuple((isbns, )) else: return tuple((i['$'] for i in isbns)) except TypeError: return tuple((isbns, ))
def affiliation(self): """A list of namedtuples representing listed affiliations in the form (id, name, city, country). Note: Might be empty. """ out = [] aff = namedtuple('Affiliation', 'id name city country') affs = listify(self._json.get('affiliation', [])) for item in affs: new = aff(id=item.get('@id'), name=item.get('affilname'), city=item.get('affiliation-city'), country=item.get('affiliation-country')) out.append(new) return out or None
def affiliation(self) -> Optional[List[NamedTuple]]: """A list of namedtuples representing listed affiliations in the form (id, name, city, country). """ out = [] aff = namedtuple('Affiliation', 'id name city country') affs = listify(self._json.get('affiliation', [])) for item in affs: new = aff(id=int(item['@id']), name=item.get('affilname'), city=item.get('affiliation-city'), country=item.get('affiliation-country')) out.append(new) return out or None
def authors(self) -> Optional[List[NamedTuple]]: """A list of namedtuples storing author information, where each namedtuple corresponds to one author. The information in each namedtuple is (eid orcid surname initials givenname documents affiliation affiliation_id city country areas). All entries are strings or None. Areas combines abbreviated subject areas followed by the number of documents in this subject. Raises ------ ValueError If the elements provided in integrity_fields do not match the actual field names (listed above). """ # Initiate namedtuple with ordered list of fields fields = 'eid orcid surname initials givenname affiliation documents '\ 'affiliation_id city country areas' auth = namedtuple('Author', fields) check_field_consistency(self._integrity, fields) # Parse elements one-by-one out = [] for item in self._json: name = item.get('preferred-name', {}) aff = item.get('affiliation-current', {}) fields = item.get('subject-area', [{ '@abbrev': '', '@frequency': '' }]) areas = [ f"{d.get('@abbrev', '')} ({d.get('@frequency', '')})" for d in listify(fields) ] new = auth(eid=item.get('eid'), orcid=item.get('orcid'), initials=name.get('initials'), surname=name.get('surname'), areas="; ".join(areas), givenname=name.get('given-name'), documents=int(item['document-count']), affiliation=aff.get('affiliation-name'), affiliation_id=aff.get('affiliation-id'), city=aff.get('affiliation-city'), country=aff.get('affiliation-country')) out.append(new) # Finalize check_integrity(out, self._integrity, self._action) return out or None
def funding(self): """List of namedtuples parsed funding information in the form (agency string id acronym country). """ path = ['item', 'xocs:meta', 'xocs:funding-list', 'xocs:funding'] funds = listify(chained_get(self._json, path, [])) out = [] fund = namedtuple('Funding', 'agency string id acronym country') for item in funds: new = fund(agency=item.get('xocs:funding-agency'), string=item.get('xocs:funding-agency-matched-string'), id=item.get('xocs:funding-agency-id'), acronym=item.get('xocs:funding-agency-acronym'), country=item.get('xocs:funding-agency-country')) out.append(new) return out or None
def __init__(self, author_id, refresh=False): """Interaction with the Author Retrieval API. Parameters ---------- author_id : str or int The ID of the author to search for. Optionally expressed as an Elsevier EID (i.e., in the form 9-s2.0-nnnnnnnn). refresh : bool or int (optional, default=False) Whether to refresh the cached file if it exists or not. If int is passed, cached file will be refreshed if the number of days since last modification exceeds that value. Examples -------- See https://pybliometrics.readthedocs.io/en/stable/examples/AuthorRetrieval.html Notes ----- The directory for cached results is `{path}/ENHANCED/{author_id}`, where `path` is specified in `~/.scopus/config.ini` and `author_id` is stripped of an eventually leading `'9-s2.0-'`. """ # Load json view = "ENHANCED" # In case Scopus adds different views in future self._id = str(int(str(author_id).split('-')[-1])) Retrieval.__init__(self, identifier=self._id, api='AuthorRetrieval', refresh=refresh, view=view) self._json = self._json['author-retrieval-response'] # Checks try: self._json = self._json[0] except KeyError: # Incomplete forward alias_json = listify(self._json['alias']['prism:url']) self._alias = [d['$'].split(':')[-1] for d in alias_json] alias_str = ', '.join(self._alias) text = f'Author profile with ID {author_id} has been merged and '\ f'the main profile is now one of {alias_str}. Please update '\ 'your records manually. Functionality of this object is '\ 'reduced.' warn(text, UserWarning) else: self._alias = None
def references(self): """List of namedtuples representing references listed in the document, in the form (position, id, doi, title, authors, authors_auid, authors_affiliationid, sourcetitle, publicationyear, volume, issue, first, last, citedbycount, type, text, fulltext). `position` is the number at which the reference appears in the document, `id` is the Scopus ID of the referenced document (EID without the "2-s2.0-"), `authors` is a string of the names of the authors in the format "Surname1, Initials1; Surname2, Initials2", `authors_auid` is a string of the author IDs joined on "; ", `authors_affiliationid` is a string of the authors' affiliation IDs joined on "; ", `sourcetitle` is the name of the source (e.g. the journal), `publicationyear` is the year of the publication as a string, `volume` and `issue`, are strings referring to the volume and issue, `first` and `last` refer to the page range, `citedbycount` is a string for the total number of citations of the cited item, `type` describes the parsing status of the reference (resolved or not), `text` is Scopus-provided information on the publication, `fulltext` is the text the authors used for the reference. Note: Requires either the FULL view or REF view. Might be empty even if refcount is positive. Specific fields can be empty. Author lists (authors, authors_auid, authors_affiliationid) may contain duplicates but None's have been filtered out. """ out = [] fields = 'position id doi title authors authors_auid '\ 'authors_affiliationid sourcetitle publicationyear volume '\ 'issue first last citedbycount type text fulltext' ref = namedtuple('Reference', fields) items = listify(self._ref.get("reference", [])) for item in items: info = item.get('ref-info', item) volisspag = info.get('volisspag', {}) or {} if isinstance(volisspag, list): volisspag = volisspag[0] volis = volisspag.get("voliss", {}) if isinstance(volis, list): volis = volis[0] # Parse author information try: # FULL view parsing auth = listify(item['ref-info']['ref-authors']['author']) authors = [', '.join([d['ce:surname'], d['ce:initials']]) for d in auth] auids = None affids = None ids = listify(info['refd-itemidlist']['itemid']) doi = _select_by_idtype(ids, id_type='DOI') scopus_id = _select_by_idtype(ids, id_type='SGR') except KeyError: # REF view parsing auth = (info.get('author-list') or {}).get('author', []) authors = [', '.join(filter(None, [d.get('ce:surname'), d.get('ce:given-name')])) for d in auth] auids = "; ".join(filter(None, [d.get('@auid') for d in auth])) affs = filter(None, [d.get('affiliation') for d in auth]) affids = "; ".join([aff.get('@id') for aff in affs]) doi = info.get('ce:doi') scopus_id = info.get('scopus-id') # Combine information new = ref(position=item.get('@id'), id=scopus_id, doi=doi, authors="; ".join(authors), authors_auid=auids or None, authors_affiliationid=affids or None, title=info.get('ref-title', {}).get('ref-titletext', info.get('title')), sourcetitle=info.get('ref-sourcetitle', info.get('sourcetitle')), publicationyear=info.get('ref-publicationyear', {}).get('@first'), volume=volis.get('@volume'), issue=volis.get('@issue'), first=volisspag.get('pagerange', {}).get('@first'), last=volisspag.get('pagerange', {}).get('@last'), citedbycount=info.get('citedby-count'), type=info.get('type'), text=info.get('ref-text'), fulltext=item.get('ref-fulltext')) out.append(new) return out or None
def results(self) -> Optional[List[NamedTuple]]: """A list of namedtuples in the form (eid doi pii pubmed_id title subtype subtypeDescription creator afid affilname affiliation_city affiliation_country author_count author_names author_ids author_afids coverDate coverDisplayDate publicationName issn source_id eIssn aggregationType volume issueIdentifier article_number pageRange description authkeywords citedby_count openaccess freetoread freetoreadLabel fund_acr fund_no fund_sponsor). Field definitions correspond to https://dev.elsevier.com/guides/ScopusSearchViews.htm and return the values as-is, except for afid, affilname, affiliation_city, affiliation_country, author_names, author_ids and author_afids: These information are joined on ";". In case an author has multiple affiliations, they are joined on "-" (e.g. Author1Aff;Author2Aff1-Author2Aff2). Raises ------ ValueError If the elements provided in integrity_fields do not match the actual field names (listed above). Notes ----- The list of authors and the list of affiliations per author are deduplicated. """ # Initiate namedtuple with ordered list of fields fields = 'eid doi pii pubmed_id title subtype subtypeDescription ' \ 'creator afid affilname affiliation_city ' \ 'affiliation_country author_count author_names author_ids '\ 'author_afids coverDate coverDisplayDate publicationName '\ 'issn source_id eIssn aggregationType volume '\ 'issueIdentifier article_number pageRange description '\ 'authkeywords citedby_count openaccess freetoread '\ 'freetoreadLabel fund_acr fund_no fund_sponsor' doc = namedtuple('Document', fields) check_field_consistency(self._integrity, fields) # Parse elements one-by-one out = [] for item in self._json: info = {} # Parse affiliations info["affilname"] = _join(item, 'affilname') info["afid"] = _join(item, 'afid') info["aff_city"] = _join(item, 'affiliation-city') info["aff_country"] = _join(item, 'affiliation-country') # Parse authors try: # Deduplicate list of authors authors = deduplicate(item['author']) # Extract information surnames = _replace_none([d['surname'] for d in authors]) firstnames = _replace_none([d['given-name'] for d in authors]) info["auth_names"] = ";".join([", ".join([t[0], t[1]]) for t in zip(surnames, firstnames)]) info["auth_ids"] = ";".join([d['authid'] for d in authors]) affs = [] for auth in authors: aff = listify(deduplicate(auth.get('afid', []))) affs.append('-'.join([d['$'] for d in aff])) if [a for a in affs if a]: info["auth_afid"] = ';'.join(affs) else: info["auth_afid"] = None except KeyError: pass date = item.get('prism:coverDate') if isinstance(date, list): date = date[0].get('$') default = [None, {"$": None}] freetoread = get_freetoread(item, ["freetoread", "value"], default) freetoreadLabel = get_freetoread(item, ["freetoreadLabel", "value"], default) new = doc(article_number=item.get('article-number'), title=item.get('dc:title'), fund_no=item.get('fund-no'), fund_sponsor=item.get('fund-sponsor'), subtype=item.get('subtype'), doi=item.get('prism:doi'), subtypeDescription=item.get('subtypeDescription'), issn=item.get('prism:issn'), creator=item.get('dc:creator'), affilname=info.get("affilname"), author_names=info.get("auth_names"), coverDate=date, volume=item.get('prism:volume'), coverDisplayDate=item.get('prism:coverDisplayDate'), publicationName=item.get('prism:publicationName'), source_id=item.get('source-id'), author_ids=info.get("auth_ids"), aggregationType=item.get('prism:aggregationType'), issueIdentifier=item.get('prism:issueIdentifier'), pageRange=item.get('prism:pageRange'), author_afids=info.get("auth_afid"), affiliation_country=info.get("aff_country"), citedby_count=int(item['citedby-count']), openaccess=int(item['openaccess']), freetoread=freetoread, freetoreadLabel=freetoreadLabel, eIssn=item.get('prism:eIssn'), author_count=item.get('author-count', {}).get('$'), affiliation_city=info.get("aff_city"), afid=info.get("afid"), description=item.get('dc:description'), pii=item.get('pii'), authkeywords=item.get('authkeywords'), eid=item.get('eid'), fund_acr=item.get('fund-acr'), pubmed_id=item.get('pubmed-id')) out.append(new) # Finalize check_integrity(out, self._integrity, self._action) return out or None
def results(self): """A list of namedtuples in the form (eid doi pii pubmed_id title subtype creator afid affilname affiliation_city affiliation_country author_count author_names author_ids author_afids coverDate coverDisplayDate publicationName issn source_id eIssn aggregationType volume issueIdentifier article_number pageRange description authkeywords citedby_count openaccess fund_acr fund_no fund_sponsor). Field definitions correspond to https://dev.elsevier.com/guides/ScopusSearchViews.htm, except for afid, affilname, affiliation_city, affiliation_country, author_count, author_names, author_ids and author_afids: These information are joined on ";". In case an author has multiple affiliations, they are joined on "-" (e.g. Author1Aff;Author2Aff1-Author2Aff2). Notes ----- The list of authors and the list of affiliations per author are deduplicated. """ out = [] fields = 'eid doi pii pubmed_id title subtype creator afid affilname '\ 'affiliation_city affiliation_country author_count '\ 'author_names author_ids author_afids coverDate '\ 'coverDisplayDate publicationName issn source_id eIssn '\ 'aggregationType volume issueIdentifier article_number '\ 'pageRange description authkeywords citedby_count '\ 'openaccess fund_acr fund_no fund_sponsor' doc = namedtuple('Document', fields) for item in self._json: info = {} # Parse affiliations try: info["affilname"] = _join(item['affiliation'], 'affilname') info["afid"] = _join(item['affiliation'], 'afid') info["aff_city"] = _join(item['affiliation'], 'affiliation-city') info["aff_country"] = _join(item['affiliation'], 'affiliation-country') except KeyError: pass # Parse authors try: # Deduplicate list of authors authors = _deduplicate(item['author']) # Extract information surnames = _replace_none([d['surname'] for d in authors]) firstnames = _replace_none([d['given-name'] for d in authors]) info["auth_names"] = ";".join([ ", ".join([t[0], t[1]]) for t in zip(surnames, firstnames) ]) info["auth_ids"] = ";".join([d['authid'] for d in authors]) affs = [] for auth in authors: aff = listify(_deduplicate(auth.get('afid', []))) affs.append('-'.join([d['$'] for d in aff])) info["auth_afid"] = (';'.join(affs) or None) except KeyError: pass date = item.get('prism:coverDate') if isinstance(date, list): date = date[0].get('$') new = doc(article_number=item.get('article-number'), title=item.get('dc:title'), fund_sponsor=item.get('fund-sponsor'), subtype=item.get('subtype'), issn=item.get('prism:issn'), creator=item.get('dc:creator'), affilname=info.get("affilname"), author_names=info.get("auth_names"), doi=item.get('prism:doi'), coverDate=date, volume=item.get('prism:volume'), coverDisplayDate=item.get('prism:coverDisplayDate'), publicationName=item.get('prism:publicationName'), source_id=item.get('source-id'), author_ids=info.get("auth_ids"), aggregationType=item.get('prism:aggregationType'), issueIdentifier=item.get('prism:issueIdentifier'), pageRange=item.get('prism:pageRange'), author_afids=info.get("auth_afid"), fund_no=item.get('fund-no'), affiliation_country=info.get("aff_country"), citedby_count=item.get('citedby-count'), openaccess=item.get('openaccess'), eIssn=item.get('prism:eIssn'), author_count=item.get('author-count', {}).get('$'), affiliation_city=info.get("aff_city"), afid=info.get("afid"), description=item.get('dc:description'), pii=item.get('pii'), authkeywords=item.get('authkeywords'), eid=item.get('eid'), fund_acr=item.get('fund-acr'), pubmed_id=item.get('pubmed-id')) out.append(new) return out or None
def __init__(self, author_id: Union[int, str], refresh: Union[bool, int] = False, view: str = "ENHANCED", **kwds: str) -> None: """Interaction with the Author Retrieval API. :param author_id: The ID or the EID of the author. :param refresh: Whether to refresh the cached file if it exists or not. If int is passed, cached file will be refreshed if the number of days since last modification exceeds that value. :param view: The view of the file that should be downloaded. Allowed values: METRICS, LIGHT, STANDARD, ENHANCED, where STANDARD includes all information of LIGHT view and ENHANCED includes all information of any view. For details see https://dev.elsevier.com/sc_author_retrieval_views.html. Note: Neither the BASIC nor the DOCUMENTS view are active, although documented. :param kwds: Keywords passed on as query parameters. Must contain fields and values mentioned in the API specification at https://dev.elsevier.com/documentation/AuthorRetrievalAPI.wadl. Raises ------ ValueError If any of the parameters `refresh` or `view` is not one of the allowed values. Notes ----- The directory for cached results is `{path}/ENHANCED/{author_id}`, where `path` is specified in your configuration file, and `author_id` is stripped of an eventually leading `'9-s2.0-'`. """ # Checks allowed_views = ('METRICS', 'LIGHT', 'STANDARD', 'ENHANCED') check_parameter_value(view, allowed_views, "view") # Load json self._id = str(author_id).split('-')[-1] self._view = view self._refresh = refresh Retrieval.__init__(self, identifier=self._id, api='AuthorRetrieval', **kwds) # Parse json self._json = self._json['author-retrieval-response'] try: self._json = self._json[0] except KeyError: # Incomplete forward alias_json = listify(self._json['alias']['prism:url']) self._alias = [d['$'].split(':')[-1] for d in alias_json] alias_str = ', '.join(self._alias) text = f'Author profile with ID {author_id} has been merged and '\ f'the main profile is now one of {alias_str}. Please update '\ 'your records manually. Functionality of this object is '\ 'reduced.' warn(text, UserWarning) else: self._alias = None self._profile = self._json.get("author-profile", {})
def classificationgroup(self) -> Optional[List[Tuple[int, int]]]: """List with (subject group ID, number of documents)-tuples.""" path = ['classificationgroup', 'classifications', 'classification'] out = [(int(item['$']), int(item['@frequency'])) for item in listify(chained_get(self._profile, path, []))] return out or None