def affiliations(self): """A list of namedtuples storing affiliation information, where each namedtuple corresponds to one affiliation. The information in each namedtuple is (eid name variant documents city country parent). All entries are strings or None. variant combines variants of names with a semicolon. Raises ------ ValueError If the elements provided in integrity_fields do not match the actual field names (listed above). """ # Initiate namedtuple with ordered list of fields fields = 'eid name variant documents city country parent' aff = namedtuple('Affiliation', fields) check_field_consistency(self.integrity, fields) # Parse elements one-by-one out = [] for item in self._json: name = item.get('affiliation-name') variants = [d.get('$', "") for d in item.get('name-variant', []) if d.get('$', "") != name] new = aff(eid=item.get('eid'), variant=";".join(variants), documents=item.get('document-count', '0'), name=name, city=item.get('city'), country=item.get('country'), parent=item.get('parent-affiliation-id')) out.append(new) # Finalize check_integrity(out, self.integrity, self.action) return out or None
def authors(self) -> Optional[List[NamedTuple]]: """A list of namedtuples storing author information, where each namedtuple corresponds to one author. The information in each namedtuple is (eid orcid surname initials givenname documents affiliation affiliation_id city country areas). All entries are strings or None. Areas combines abbreviated subject areas followed by the number of documents in this subject. Raises ------ ValueError If the elements provided in integrity_fields do not match the actual field names (listed above). """ # Initiate namedtuple with ordered list of fields fields = 'eid orcid surname initials givenname affiliation documents '\ 'affiliation_id city country areas' auth = namedtuple('Author', fields) check_field_consistency(self._integrity, fields) # Parse elements one-by-one out = [] for item in self._json: name = item.get('preferred-name', {}) aff = item.get('affiliation-current', {}) fields = item.get('subject-area', [{ '@abbrev': '', '@frequency': '' }]) areas = [ f"{d.get('@abbrev', '')} ({d.get('@frequency', '')})" for d in listify(fields) ] new = auth(eid=item.get('eid'), orcid=item.get('orcid'), initials=name.get('initials'), surname=name.get('surname'), areas="; ".join(areas), givenname=name.get('given-name'), documents=int(item['document-count']), affiliation=aff.get('affiliation-name'), affiliation_id=aff.get('affiliation-id'), city=aff.get('affiliation-city'), country=aff.get('affiliation-country')) out.append(new) # Finalize check_integrity(out, self._integrity, self._action) return out or None
def results(self): """A list of namedtuples in the form (eid doi pii pubmed_id title subtype subtypeDescription creator afid affilname affiliation_city affiliation_country author_count author_names author_ids author_afids coverDate coverDisplayDate publicationName issn source_id eIssn aggregationType volume issueIdentifier article_number pageRange description authkeywords citedby_count openaccess fund_acr fund_no fund_sponsor). Field definitions correspond to https://dev.elsevier.com/guides/ScopusSearchViews.htm and return the values as-is, except for afid, affilname, affiliation_city, affiliation_country, author_names, author_ids and author_afids: These information are joined on ";". In case an author has multiple affiliations, they are joined on "-" (e.g. Author1Aff;Author2Aff1-Author2Aff2). Raises ------ ValueError If the elements provided in integrity_fields do not match the actual field names (listed above). Notes ----- The list of authors and the list of affiliations per author are deduplicated. """ # Initiate namedtuple with ordered list of fields fields = 'eid doi pii pubmed_id title subtype subtypeDescription creator ' \ 'afid affilname affiliation_city affiliation_country author_count ' \ 'author_names author_ids author_afids coverDate '\ 'coverDisplayDate publicationName issn source_id eIssn '\ 'aggregationType volume issueIdentifier article_number '\ 'pageRange description authkeywords citedby_count '\ 'openaccess fund_acr fund_no fund_sponsor' doc = namedtuple('Document', fields) check_field_consistency(self.integrity, fields) # Parse elements one-by-one out = [] for item in self._json: info = {} # Parse affiliations info["affilname"] = _join(item, 'affilname') info["afid"] = _join(item, 'afid') info["aff_city"] = _join(item, 'affiliation-city') info["aff_country"] = _join(item, 'affiliation-country') # Parse authors try: # Deduplicate list of authors authors = _deduplicate(item['author']) # Extract information surnames = _replace_none([d['surname'] for d in authors]) firstnames = _replace_none([d['given-name'] for d in authors]) info["auth_names"] = ";".join([", ".join([t[0], t[1]]) for t in zip(surnames, firstnames)]) info["auth_ids"] = ";".join([d['authid'] for d in authors]) affs = [] for auth in authors: aff = listify(_deduplicate(auth.get('afid', []))) affs.append('-'.join([d['$'] for d in aff])) info["auth_afid"] = (';'.join(affs) or None) except KeyError: pass date = item.get('prism:coverDate') if isinstance(date, list): date = date[0].get('$') new = doc(article_number=item.get('article-number'), title=item.get('dc:title'), fund_no=item.get('fund-no'), fund_sponsor=item.get('fund-sponsor'), subtype=item.get('subtype'), doi=item.get('prism:doi'), subtypeDescription=item.get('subtypeDescription'), issn=item.get('prism:issn'), creator=item.get('dc:creator'), affilname=info.get("affilname"), author_names=info.get("auth_names"), coverDate=date, volume=item.get('prism:volume'), coverDisplayDate=item.get('prism:coverDisplayDate'), publicationName=item.get('prism:publicationName'), source_id=item.get('source-id'), author_ids=info.get("auth_ids"), aggregationType=item.get('prism:aggregationType'), issueIdentifier=item.get('prism:issueIdentifier'), pageRange=item.get('prism:pageRange'), author_afids=info.get("auth_afid"), affiliation_country=info.get("aff_country"), citedby_count=item.get('citedby-count'), openaccess=item.get('openaccess'), eIssn=item.get('prism:eIssn'), author_count=item.get('author-count', {}).get('$'), affiliation_city=info.get("aff_city"), afid=info.get("afid"), description=item.get('dc:description'), pii=item.get('pii'), authkeywords=item.get('authkeywords'), eid=item.get('eid'), fund_acr=item.get('fund-acr'), pubmed_id=item.get('pubmed-id')) out.append(new) # Finalize check_integrity(out, self.integrity, self.action) return out or None