Ejemplo n.º 1
0
 def authorgroup(self) -> Optional[List[NamedTuple]]:
     """A list of namedtuples representing the article's authors organized
     by affiliation, in the form (affiliation_id, dptid, organization,
     city, postalcode, addresspart, country, collaboration, auid, orcid,
     indexed_name, surname, given_name).
     If "given_name" is not present, fall back to initials.
     Note: Affiliation information might be missing or mal-assigned even
     when it looks correct in the web view.  In this case please request
     a correction.  It is generally missing for collaborations.
     """
     # Information can be one of three forms:
     # 1. A dict with one key (author) or two keys (affiliation and author)
     # 2. A list of dicts with as in 1, one for each affiliation (incl. missing)
     # 3. A list of two dicts with one key each (author and collaboration)
     # Initialization
     fields = 'affiliation_id dptid organization city postalcode '\
              'addresspart country collaboration auid orcid indexed_name '\
              'surname given_name'
     auth = namedtuple('Author', fields)
     items = listify(self._head.get('author-group', []))
     index_path = ['preferred-name', 'ce:indexed-name']
     # Check for collaboration
     keys = [k for x in items for k in list(x.keys())]
     if "collaboration" in keys:
         collaboration = items.pop(-1)['collaboration']
     else:
         collaboration = {'ce:indexed-name': None}
     # Iterate through each author-affiliation combination
     out = []
     for item in items:
         if not item:
             continue
         # Affiliation information
         aff = item.get('affiliation', {})
         aff_id = make_int_if_possible(aff.get("@afid"))
         dep_id = make_int_if_possible(aff.get("@dptid"))
         org = _get_org(aff)
         # Author information (might relate to collaborations)
         authors = listify(item.get('author', item.get('collaboration',
                                                       [])))
         for au in authors:
             try:
                 given = au.get('ce:given-name', au['ce:initials'])
             except KeyError:  # Collaboration
                 given = au.get('ce:text')
             new = auth(affiliation_id=aff_id,
                        organization=org,
                        city=aff.get('city'),
                        dptid=dep_id,
                        postalcode=aff.get('postal-code'),
                        addresspart=aff.get('address-part'),
                        country=aff.get('country'),
                        collaboration=collaboration.get('ce:indexed-name'),
                        auid=int(au['@auid']),
                        orcid=au.get('@orcid'),
                        surname=au.get('ce:surname'),
                        given_name=given,
                        indexed_name=chained_get(au, index_path))
             out.append(new)
     return out or None
 def sequencebank(self):
     """List of namedtuples representing biological entities defined or
     mentioned in the text, in the form (name, sequence_number, type).
     """
     path = ['enhancement', 'sequencebanks', 'sequencebank']
     items = listify(chained_get(self._head, path, []))
     bank = namedtuple('Sequencebank', 'name sequence_number type')
     out = []
     for item in items:
         numbers = listify(item['sequence-number'])
         for number in numbers:
             new = bank(name=item['@name'], sequence_number=number['$'],
                        type=number['@type'])
             out.append(new)
     return out or None
 def authorgroup(self):
     """A list of namedtuples representing the article's authors organized
     by affiliation, in the form (affiliation_id, dptid, organization,
     city, postalcode, addresspart, country, auid, indexed_name,
     surname, given_name).
     If "given_name" is not present, fall back to initials.
     Note: Affiliation information might be missing or mal-assigned even
     when it lookes correct in the web view.  In this case please request
     a correction.
     """
     out = []
     fields = 'affiliation_id dptid organization city postalcode '\
              'addresspart country auid indexed_name surname given_name'
     auth = namedtuple('Author', fields)
     items = listify(self._head.get('author-group', []))
     index_path = ['preferred-name', 'ce:indexed-name']
     for item in items:
         if not item:
             continue
         # Affiliation information
         aff = item.get('affiliation', {})
         try:
             aff_ids = listify(aff['affiliation-id'])
             aff_id = ", ".join([a["@afid"] for a in aff_ids])
         except KeyError:
             aff_id = aff.get("@afid")
         org = _get_org(aff)
         # Author information (might relate to collaborations)
         authors = listify(item.get('author', item.get('collaboration',
                                                       [])))
         for au in authors:
             try:
                 given = au.get('ce:given-name', au['ce:initials'])
             except KeyError:  # Collaboration
                 given = au.get('ce:text')
             new = auth(affiliation_id=aff_id,
                        organization=org,
                        city=aff.get('city'),
                        dptid=aff.get("@dptid"),
                        postalcode=aff.get('postal-code'),
                        addresspart=aff.get('address-part'),
                        country=aff.get('country'),
                        auid=au.get('@auid'),
                        surname=au.get('ce:surname'),
                        given_name=given,
                        indexed_name=chained_get(au, index_path))
             out.append(new)
     return out or None
Ejemplo n.º 4
0
 def correspondence(self):
     """List of namedtuples representing the authors to whom correspondence
     should be addressed, in the form (surname, initials, organization,
     country, city_group). Multiple organziations are joined on semicolon.
     """
     fields = 'surname initials organization country city_group'
     auth = namedtuple('Correspondence', fields)
     items = listify(self._head.get('correspondence', []))
     out = []
     for item in items:
         aff = item.get('affiliation', {})
         try:
             org = aff['organization']
             try:
                 org = org['$']
             except TypeError:  # Multiple names given
                 org = "; ".join([d['$'] for d in org])
         except KeyError:
             org = None
         new = auth(surname=item.get('person', {}).get('ce:surname'),
                    initials=item.get('person', {}).get('ce:initials'),
                    organization=org,
                    country=aff.get('country'),
                    city_group=aff.get('city-group'))
         out.append(new)
     return out or None
Ejemplo n.º 5
0
    def __init__(self, author_id, refresh=False):
        """Class to represent a Scopus Author query by the scopus-id.

        Parameters
        ----------
        author_id : str or int
            The ID of the author to search for.  Optionally expressed
            as an Elsevier EID (i.e., in the form 9-s2.0-nnnnnnnn).

        refresh : bool (optional, default=False)
            Whether to refresh the cached file (if it exists) or not.

        Notes
        -----
        The files are cached in ~/.scopus/author_retrieval/ENHANCED/{author_id}
        (without eventually leading '9-s2.0-').
        """
        # Load json
        view = "ENHANCED"  # In case Scopus adds different views in future
        self._id = str(int(str(author_id).split('-')[-1]))
        Retrieval.__init__(self, identifier=self._id, api='AuthorRetrieval',
                           refresh=refresh, view=view)
        self._json = self._json['author-retrieval-response']
        # Checks
        try:
            self._json = self._json[0]
        except KeyError:  # Incomplete forward
            alias_json = listify(self._json['alias']['prism:url'])
            alias = ', '.join([d['$'].split(':')[-1] for d in alias_json])
            text = 'Author profile with ID {} has been merged and the main '\
                   'profile is now one of {}.  Please update your records '\
                   'manually.  Functionality of this object is '\
                   'reduced.'.format(author_id, alias)
            warn(text, UserWarning)
 def authors(self):
     """A list of namedtuples representing the article's authors, in the
     form (auid, indexed_name, surname, given_name, affiliation_id,
     affiliation, city, country).
     Note: The affiliation referred to here is what Scopus' algorithm
     determined as the main affiliation.  Property `authorgroup` provides
     all affiliations.
     """
     out = []
     fields = 'auid indexed_name surname given_name affiliation'
     auth = namedtuple('Author', fields)
     for item in chained_get(self._json, ['authors', 'author'], []):
         affs = [a for a in listify(item.get('affiliation')) if a]
         if affs:
             aff = [aff.get('@id') for aff in affs]
         else:
             aff = None
         new = auth(auid=item['@auid'],
                    surname=item.get('ce:surname'),
                    indexed_name=item.get('ce:indexed-name'),
                    affiliation=aff,
                    given_name=chained_get(
                        item, ['preferred-name', 'ce:given-name']))
         out.append(new)
     return out or None
Ejemplo n.º 7
0
    def funding(self) -> Optional[List[NamedTuple]]:
        """List of namedtuples parsed funding information in the form
        (agency, agency_id, string, funding_id, acronym, country).
        """
        def _get_funding_id(f_dict: dict) -> list:
            funding_get = f_dict.get('xocs:funding-id', [])
            try:
                return [v['$']
                        for v in funding_get] or None  # multiple or empty
            except TypeError:
                return [funding_get]  # single

        path = ['item', 'xocs:meta', 'xocs:funding-list', 'xocs:funding']
        funds = listify(chained_get(self._json, path, []))
        out = []
        fields = 'agency agency_id string funding_id acronym country'
        fund = namedtuple('Funding', fields)
        for item in funds:
            new = fund(agency=item.get('xocs:funding-agency'),
                       agency_id=item.get('xocs:funding-agency-id'),
                       string=item.get('xocs:funding-agency-matched-string'),
                       funding_id=_get_funding_id(item),
                       acronym=item.get('xocs:funding-agency-acronym'),
                       country=item.get('xocs:funding-agency-country'))
            out.append(new)
        return out or None
Ejemplo n.º 8
0
 def classificationgroup(self):
     """List with (subject group ID, number of documents)-tuples."""
     path = ['author-profile', 'classificationgroup', 'classifications',
             'classification']
     out = [(item['$'], item['@frequency']) for item in
            listify(chained_get(self._json, path, []))]
     return out or None
Ejemplo n.º 9
0
 def authors(self) -> Optional[List[NamedTuple]]:
     """A list of namedtuples representing the article's authors, in the
     form (auid, indexed_name, surname, given_name, affiliation).  In case
     multiple affiliation IDs are given, they are joined on ";".
     Note: The affiliation referred to here is what Scopus' algorithm
     determined as the main affiliation.  Property `authorgroup` provides
     all affiliations.
     """
     out = []
     fields = 'auid indexed_name surname given_name affiliation'
     auth = namedtuple('Author', fields)
     for item in chained_get(self._json, ['authors', 'author'], []):
         affs = [a for a in listify(item.get('affiliation')) if a] or None
         try:
             aff = ";".join([aff.get('@id') for aff in affs])
         except TypeError:
             aff = None
         new = auth(auid=int(item['@auid']),
                    surname=item.get('ce:surname'),
                    indexed_name=item.get('ce:indexed-name'),
                    affiliation=aff,
                    given_name=chained_get(
                        item, ['preferred-name', 'ce:given-name']))
         out.append(new)
     return out or None
Ejemplo n.º 10
0
 def contributor_group(self):
     """List of namedtuples representing contributors compiled by Scopus,
     in the form (given_name, initials, surname, indexed_name, role).
     """
     path = ['source', 'contributor-group']
     items = listify(chained_get(self._head, path, []))
     out = []
     fields = 'given_name initials surname indexed_name role'
     pers = namedtuple('Contributor', fields)
     for item in items:
         try:
             entry = item.get('contributor', {})
             new = pers(indexed_name=entry.get('ce:indexed-name'),
                        role=entry.get('@role'),
                        surname=entry.get('ce:surname'),
                        given_name=entry.get('ce:given-name'),
                        initials=entry.get('ce:initials'))
             out.append(new)
         except AttributeError:
             for i in entry:
                 e = i.get('contributor', {})
                 new = pers(indexed_name=e.get('ce:indexed-name'),
                            role=e.get('@role'),
                            surname=e.get('ce:surname'),
                            given_name=e.get('ce:given-name'),
                            initials=e.get('ce:initials'))
                 out.append(new)
     return out or None
 def isbn(self):
     """ISBNs belonging to publicationName as tuple of variying length,
     (e.g. ISBN-10 or ISBN-13)."""
     isbns = listify(chained_get(self._head, ['source', 'isbn'], []))
     if len(isbns) == 0:
         return None
     else:
         return tuple((i['$'] for i in isbns))
Ejemplo n.º 12
0
    def __init__(self, author_id, refresh=False, view="ENHANCED"):
        """Interaction with the Author Retrieval API.

        Parameters
        ----------
        author_id : str or int
            The ID of the author to search for.  Optionally expressed
            as an Elsevier EID (i.e., in the form 9-s2.0-nnnnnnnn).

        refresh : bool or int (optional, default=False)
            Whether to refresh the cached file if it exists or not.  If int
            is passed, cached file will be refreshed if the number of days
            since last modification exceeds that value.

        view : str (optional, default=META_ABS)
            The view of the file that should be downloaded.  Allowed values:
            METRICS, LIGHT, STANDARD, ENHANCED, where STANDARD includes all
            information of LIGHT view and ENHANCED includes all information of
            any view.  For details see
            https://dev.elsevier.com/sc_author_retrieval_views.html.
            Note: Neither the BASIC nor the DOCUMENTS view are not active,
            although documented.

        Examples
        --------
        See https://pybliometrics.readthedocs.io/en/stable/examples/AuthorRetrieval.html.

        Notes
        -----
        The directory for cached results is `{path}/ENHANCED/{author_id}`,
        where `path` is specified in `~/.scopus/config.ini` and `author_id`
        is stripped of an eventually leading `'9-s2.0-'`.
        """
        # Checks
        allowed_views = ('METRICS', 'LIGHT', 'STANDARD', 'ENHANCED')
        check_parameter_value(view, allowed_views, "view")

        # Load json
        self._id = str(int(str(author_id).split('-')[-1]))
        Retrieval.__init__(self, identifier=self._id, api='AuthorRetrieval',
                           refresh=refresh, view=view)
        self._json = self._json['author-retrieval-response']
        try:
            self._json = self._json[0]
        except KeyError:  # Incomplete forward
            alias_json = listify(self._json['alias']['prism:url'])
            self._alias = [d['$'].split(':')[-1] for d in alias_json]
            alias_str = ', '.join(self._alias)
            text = f'Author profile with ID {author_id} has been merged and '\
                   f'the main profile is now one of {alias_str}.  Please update '\
                   'your records manually.  Functionality of this object is '\
                   'reduced.'
            warn(text, UserWarning)
        else:
            self._alias = None
        self._profile = self._json.get("author-profile", {})
 def subject_areas(self):
     """List of namedtuples containing subject areas of the article
     in the form (area abbreviation code).
     Note: Requires the FULL view of the article.
     """
     area = namedtuple('Area', 'area abbreviation code')
     path = ['subject-areas', 'subject-area']
     out = [area(area=item['$'], abbreviation=item['@abbrev'],
                 code=item['@code'])
            for item in listify(chained_get(self._json, path, []))]
     return out or None
Ejemplo n.º 14
0
 def name_variants(self):
     """List of named tuples containing variants of the author name with
     number of documents published with that variant.
     """
     fields = 'indexed_name initials surname given_name doc_count'
     variant = namedtuple('Variant', fields)
     out = [variant(indexed_name=var['indexed-name'], surname=var['surname'],
                    doc_count=var.get('@doc-count'), initials=var['initials'],
                    given_name=var.get('given-name'))
            for var in listify(self._profile.get('name-variant', []))]
     return out or None
Ejemplo n.º 15
0
 def journal_history(self):
     """List of named tuples of authored publications in the form
     (sourcetitle, abbreviation, type, issn).  issn is only given
     for journals.  abbreviation and issn may be None.
     """
     jour = namedtuple('Journal', 'sourcetitle abbreviation type issn')
     path = ['author-profile', 'journal-history', 'journal']
     hist = [jour(sourcetitle=pub.get('sourcetitle'), issn=pub.get('issn'),
                  abbreviation=pub.get('sourcetitle-abbrev'),
                  type=pub.get('@type'))
             for pub in listify(chained_get(self._json, path, []))]
     return hist or None
 def idxterms(self):
     """List of index terms (these are just one category of those
     Scopus provides in the web version)
     ."""
     try:
         terms = listify(self._json.get("idxterms", {}).get('mainterm', []))
     except AttributeError:  # idxterms is empty
         return None
     try:
         return [d['$'] for d in terms] or None
     except AttributeError:
         return None
 def chemicals(self):
     """List of namedtuples representing chemical entities in the form
     (source, chemical_name, cas_registry_number).  In case multiple
     numbers given, they are joined on ";".
     """
     path = ['enhancement', 'chemicalgroup', 'chemicals']
     items = listify(chained_get(self._head, path, []))
     fields = 'source chemical_name cas_registry_number'
     chemical = namedtuple('Chemical', fields)
     out = []
     for item in items:
         for chem in listify(item['chemical']):
             number = chem.get('cas-registry-number')
             try:  # Multiple numbers given
                 num = ";".join([n['$'] for n in number])
             except TypeError:
                 num = number
             new = chemical(source=item['@source'], cas_registry_number=num,
                            chemical_name=chem['chemical-name'])
             out.append(new)
     return out or None
Ejemplo n.º 18
0
 def isbn(self):
     """ISBNs belonging to publicationName as tuple of variying length,
     (e.g. ISBN-10 or ISBN-13)."""
     isbns = listify(chained_get(self._head, ['source', 'isbn'], []))
     try:
         if len(isbns) == 0:
             return None
         elif isinstance(isbns, str):
             return tuple((isbns, ))
         else:
             return tuple((i['$'] for i in isbns))
     except TypeError:
         return tuple((isbns, ))
 def affiliation(self):
     """A list of namedtuples representing listed affiliations in
     the form (id, name, city, country).
     Note: Might be empty.
     """
     out = []
     aff = namedtuple('Affiliation', 'id name city country')
     affs = listify(self._json.get('affiliation', []))
     for item in affs:
         new = aff(id=item.get('@id'), name=item.get('affilname'),
                   city=item.get('affiliation-city'),
                   country=item.get('affiliation-country'))
         out.append(new)
     return out or None
Ejemplo n.º 20
0
 def affiliation(self) -> Optional[List[NamedTuple]]:
     """A list of namedtuples representing listed affiliations in
     the form (id, name, city, country).
     """
     out = []
     aff = namedtuple('Affiliation', 'id name city country')
     affs = listify(self._json.get('affiliation', []))
     for item in affs:
         new = aff(id=int(item['@id']),
                   name=item.get('affilname'),
                   city=item.get('affiliation-city'),
                   country=item.get('affiliation-country'))
         out.append(new)
     return out or None
Ejemplo n.º 21
0
    def authors(self) -> Optional[List[NamedTuple]]:
        """A list of namedtuples storing author information,
        where each namedtuple corresponds to one author.
        The information in each namedtuple is (eid orcid surname initials givenname
        documents affiliation affiliation_id city country areas).

        All entries are strings or None.  Areas combines abbreviated subject
        areas followed by the number of documents in this subject.

        Raises
        ------
        ValueError
            If the elements provided in integrity_fields do not match the
            actual field names (listed above).
        """
        # Initiate namedtuple with ordered list of fields
        fields = 'eid orcid surname initials givenname affiliation documents '\
                 'affiliation_id city country areas'
        auth = namedtuple('Author', fields)
        check_field_consistency(self._integrity, fields)
        # Parse elements one-by-one
        out = []
        for item in self._json:
            name = item.get('preferred-name', {})
            aff = item.get('affiliation-current', {})
            fields = item.get('subject-area', [{
                '@abbrev': '',
                '@frequency': ''
            }])
            areas = [
                f"{d.get('@abbrev', '')} ({d.get('@frequency', '')})"
                for d in listify(fields)
            ]
            new = auth(eid=item.get('eid'),
                       orcid=item.get('orcid'),
                       initials=name.get('initials'),
                       surname=name.get('surname'),
                       areas="; ".join(areas),
                       givenname=name.get('given-name'),
                       documents=int(item['document-count']),
                       affiliation=aff.get('affiliation-name'),
                       affiliation_id=aff.get('affiliation-id'),
                       city=aff.get('affiliation-city'),
                       country=aff.get('affiliation-country'))
            out.append(new)
        # Finalize
        check_integrity(out, self._integrity, self._action)
        return out or None
 def funding(self):
     """List of namedtuples parsed funding information in the form
     (agency string id acronym country).
     """
     path = ['item', 'xocs:meta', 'xocs:funding-list', 'xocs:funding']
     funds = listify(chained_get(self._json, path, []))
     out = []
     fund = namedtuple('Funding', 'agency string id acronym country')
     for item in funds:
         new = fund(agency=item.get('xocs:funding-agency'),
             string=item.get('xocs:funding-agency-matched-string'),
             id=item.get('xocs:funding-agency-id'),
             acronym=item.get('xocs:funding-agency-acronym'),
             country=item.get('xocs:funding-agency-country'))
         out.append(new)
     return out or None
Ejemplo n.º 23
0
    def __init__(self, author_id, refresh=False):
        """Interaction with the Author Retrieval API.

        Parameters
        ----------
        author_id : str or int
            The ID of the author to search for.  Optionally expressed
            as an Elsevier EID (i.e., in the form 9-s2.0-nnnnnnnn).

        refresh : bool or int (optional, default=False)
            Whether to refresh the cached file if it exists or not.  If int
            is passed, cached file will be refreshed if the number of days
            since last modification exceeds that value.

        Examples
        --------
        See https://pybliometrics.readthedocs.io/en/stable/examples/AuthorRetrieval.html

        Notes
        -----
        The directory for cached results is `{path}/ENHANCED/{author_id}`,
        where `path` is specified in `~/.scopus/config.ini` and `author_id`
        is stripped of an eventually leading `'9-s2.0-'`.
        """
        # Load json
        view = "ENHANCED"  # In case Scopus adds different views in future
        self._id = str(int(str(author_id).split('-')[-1]))
        Retrieval.__init__(self,
                           identifier=self._id,
                           api='AuthorRetrieval',
                           refresh=refresh,
                           view=view)
        self._json = self._json['author-retrieval-response']
        # Checks
        try:
            self._json = self._json[0]
        except KeyError:  # Incomplete forward
            alias_json = listify(self._json['alias']['prism:url'])
            self._alias = [d['$'].split(':')[-1] for d in alias_json]
            alias_str = ', '.join(self._alias)
            text = f'Author profile with ID {author_id} has been merged and '\
                   f'the main profile is now one of {alias_str}.  Please update '\
                   'your records manually.  Functionality of this object is '\
                   'reduced.'
            warn(text, UserWarning)
        else:
            self._alias = None
    def references(self):
        """List of namedtuples representing references listed in the document,
        in the form (position, id, doi, title, authors, authors_auid,
        authors_affiliationid, sourcetitle, publicationyear, volume, issue,
        first, last, citedbycount, type, text, fulltext).
        `position` is the number at which the reference appears in the
        document, `id` is the Scopus ID of the referenced document (EID
        without the "2-s2.0-"), `authors` is a string of the names of the
        authors in the format "Surname1, Initials1; Surname2, Initials2",
        `authors_auid` is a string of the author IDs joined on "; ",
        `authors_affiliationid` is a string of the authors' affiliation IDs
        joined on "; ", `sourcetitle` is the name of the source (e.g. the
        journal), `publicationyear` is the year of the publication as a string,
        `volume` and `issue`, are strings referring to the volume and issue,
        `first` and `last` refer to the page range, `citedbycount` is a string
        for the total number of citations of the cited item, `type` describes
        the parsing status of the reference (resolved or not), `text` is
        Scopus-provided information on the publication, `fulltext` is the text
        the authors used for the reference.

        Note: Requires either the FULL view or REF view.
        Might be empty even if refcount is positive.  Specific fields can
        be empty.
        Author lists (authors, authors_auid, authors_affiliationid) may contain
        duplicates but None's have been filtered out.
        """
        out = []
        fields = 'position id doi title authors authors_auid '\
                 'authors_affiliationid sourcetitle publicationyear volume '\
                 'issue first last citedbycount type text fulltext'
        ref = namedtuple('Reference', fields)
        items = listify(self._ref.get("reference", []))
        for item in items:
            info = item.get('ref-info', item)
            volisspag = info.get('volisspag', {}) or {}
            if isinstance(volisspag, list):
                volisspag = volisspag[0]
            volis = volisspag.get("voliss", {})
            if isinstance(volis, list):
                volis = volis[0]
            # Parse author information
            try:  # FULL view parsing
                auth = listify(item['ref-info']['ref-authors']['author'])
                authors = [', '.join([d['ce:surname'], d['ce:initials']])
                           for d in auth]
                auids = None
                affids = None
                ids = listify(info['refd-itemidlist']['itemid'])
                doi = _select_by_idtype(ids, id_type='DOI')
                scopus_id = _select_by_idtype(ids, id_type='SGR')
            except KeyError:  # REF view parsing
                auth = (info.get('author-list') or {}).get('author', [])
                authors = [', '.join(filter(None, [d.get('ce:surname'),
                                                   d.get('ce:given-name')]))
                           for d in auth]
                auids = "; ".join(filter(None, [d.get('@auid') for d in auth]))
                affs = filter(None, [d.get('affiliation') for d in auth])
                affids = "; ".join([aff.get('@id') for aff in affs])
                doi = info.get('ce:doi')
                scopus_id = info.get('scopus-id')
            # Combine information
            new = ref(position=item.get('@id'), id=scopus_id, doi=doi,
                authors="; ".join(authors), authors_auid=auids or None,
                authors_affiliationid=affids or None,
                title=info.get('ref-title', {}).get('ref-titletext', info.get('title')),
                sourcetitle=info.get('ref-sourcetitle', info.get('sourcetitle')),
                publicationyear=info.get('ref-publicationyear', {}).get('@first'),
                volume=volis.get('@volume'), issue=volis.get('@issue'),
                first=volisspag.get('pagerange', {}).get('@first'),
                last=volisspag.get('pagerange', {}).get('@last'),
                citedbycount=info.get('citedby-count'), type=info.get('type'),
                text=info.get('ref-text'),
                fulltext=item.get('ref-fulltext'))
            out.append(new)
        return out or None
Ejemplo n.º 25
0
    def results(self) -> Optional[List[NamedTuple]]:
        """A list of namedtuples in the form (eid doi pii pubmed_id title
        subtype subtypeDescription creator afid affilname affiliation_city
        affiliation_country author_count author_names author_ids author_afids
        coverDate coverDisplayDate publicationName issn source_id eIssn
        aggregationType volume issueIdentifier article_number pageRange
        description authkeywords citedby_count openaccess freetoread
        freetoreadLabel fund_acr fund_no fund_sponsor).
        Field definitions correspond to
        https://dev.elsevier.com/guides/ScopusSearchViews.htm and return the
        values as-is, except for afid, affilname, affiliation_city,
        affiliation_country, author_names, author_ids and author_afids:  These
        information are joined on ";".  In case an author has multiple
        affiliations, they are joined on "-"
        (e.g. Author1Aff;Author2Aff1-Author2Aff2).

        Raises
        ------
        ValueError
            If the elements provided in integrity_fields do not match the
            actual field names (listed above).

        Notes
        -----
        The list of authors and the list of affiliations per author are
        deduplicated.
        """
        # Initiate namedtuple with ordered list of fields
        fields = 'eid doi pii pubmed_id title subtype subtypeDescription ' \
                 'creator afid affilname affiliation_city ' \
                 'affiliation_country author_count author_names author_ids '\
                 'author_afids coverDate coverDisplayDate publicationName '\
                 'issn source_id eIssn aggregationType volume '\
                 'issueIdentifier article_number pageRange description '\
                 'authkeywords citedby_count openaccess freetoread '\
                 'freetoreadLabel fund_acr fund_no fund_sponsor'
        doc = namedtuple('Document', fields)
        check_field_consistency(self._integrity, fields)
        # Parse elements one-by-one
        out = []
        for item in self._json:
            info = {}
            # Parse affiliations
            info["affilname"] = _join(item, 'affilname')
            info["afid"] = _join(item, 'afid')
            info["aff_city"] = _join(item, 'affiliation-city')
            info["aff_country"] = _join(item, 'affiliation-country')
            # Parse authors
            try:
                # Deduplicate list of authors
                authors = deduplicate(item['author'])
                # Extract information
                surnames = _replace_none([d['surname'] for d in authors])
                firstnames = _replace_none([d['given-name'] for d in authors])
                info["auth_names"] = ";".join([", ".join([t[0], t[1]]) for t in
                                               zip(surnames, firstnames)])
                info["auth_ids"] = ";".join([d['authid'] for d in authors])
                affs = []
                for auth in authors:
                    aff = listify(deduplicate(auth.get('afid', [])))
                    affs.append('-'.join([d['$'] for d in aff]))
                if [a for a in affs if a]:
                    info["auth_afid"] = ';'.join(affs)
                else:
                    info["auth_afid"] = None
            except KeyError:
                pass
            date = item.get('prism:coverDate')
            if isinstance(date, list):
                date = date[0].get('$')
            default = [None, {"$": None}]
            freetoread = get_freetoread(item, ["freetoread", "value"], default)
            freetoreadLabel = get_freetoread(item, ["freetoreadLabel", "value"], default)
            new = doc(article_number=item.get('article-number'),
                      title=item.get('dc:title'), fund_no=item.get('fund-no'),
                      fund_sponsor=item.get('fund-sponsor'),
                      subtype=item.get('subtype'), doi=item.get('prism:doi'),
                      subtypeDescription=item.get('subtypeDescription'),
                      issn=item.get('prism:issn'), creator=item.get('dc:creator'),
                      affilname=info.get("affilname"),
                      author_names=info.get("auth_names"),
                      coverDate=date, volume=item.get('prism:volume'),
                      coverDisplayDate=item.get('prism:coverDisplayDate'),
                      publicationName=item.get('prism:publicationName'),
                      source_id=item.get('source-id'), author_ids=info.get("auth_ids"),
                      aggregationType=item.get('prism:aggregationType'),
                      issueIdentifier=item.get('prism:issueIdentifier'),
                      pageRange=item.get('prism:pageRange'),
                      author_afids=info.get("auth_afid"),
                      affiliation_country=info.get("aff_country"),
                      citedby_count=int(item['citedby-count']),
                      openaccess=int(item['openaccess']),
                      freetoread=freetoread, freetoreadLabel=freetoreadLabel,
                      eIssn=item.get('prism:eIssn'),
                      author_count=item.get('author-count', {}).get('$'),
                      affiliation_city=info.get("aff_city"), afid=info.get("afid"),
                      description=item.get('dc:description'), pii=item.get('pii'),
                      authkeywords=item.get('authkeywords'), eid=item.get('eid'),
                      fund_acr=item.get('fund-acr'), pubmed_id=item.get('pubmed-id'))
            out.append(new)
        # Finalize
        check_integrity(out, self._integrity, self._action)
        return out or None
Ejemplo n.º 26
0
    def results(self):
        """A list of namedtuples in the form (eid doi pii pubmed_id title
        subtype creator afid affilname affiliation_city affiliation_country
        author_count author_names author_ids author_afids coverDate
        coverDisplayDate publicationName issn source_id eIssn aggregationType
        volume issueIdentifier article_number pageRange description
        authkeywords citedby_count openaccess fund_acr fund_no fund_sponsor).
        Field definitions correspond to
        https://dev.elsevier.com/guides/ScopusSearchViews.htm, except for
        afid, affilname, affiliation_city, affiliation_country, author_count,
        author_names, author_ids and author_afids:  These information are
        joined on ";".  In case an author has multiple affiliations, they are
        joined on "-" (e.g. Author1Aff;Author2Aff1-Author2Aff2).

        Notes
        -----
        The list of authors and the list of affiliations per author are
        deduplicated.
        """
        out = []
        fields = 'eid doi pii pubmed_id title subtype creator afid affilname '\
                 'affiliation_city affiliation_country author_count '\
                 'author_names author_ids author_afids coverDate '\
                 'coverDisplayDate publicationName issn source_id eIssn '\
                 'aggregationType volume issueIdentifier article_number '\
                 'pageRange description authkeywords citedby_count '\
                 'openaccess fund_acr fund_no fund_sponsor'
        doc = namedtuple('Document', fields)
        for item in self._json:
            info = {}
            # Parse affiliations
            try:
                info["affilname"] = _join(item['affiliation'], 'affilname')
                info["afid"] = _join(item['affiliation'], 'afid')
                info["aff_city"] = _join(item['affiliation'],
                                         'affiliation-city')
                info["aff_country"] = _join(item['affiliation'],
                                            'affiliation-country')
            except KeyError:
                pass
            # Parse authors
            try:
                # Deduplicate list of authors
                authors = _deduplicate(item['author'])
                # Extract information
                surnames = _replace_none([d['surname'] for d in authors])
                firstnames = _replace_none([d['given-name'] for d in authors])
                info["auth_names"] = ";".join([
                    ", ".join([t[0], t[1]]) for t in zip(surnames, firstnames)
                ])
                info["auth_ids"] = ";".join([d['authid'] for d in authors])
                affs = []
                for auth in authors:
                    aff = listify(_deduplicate(auth.get('afid', [])))
                    affs.append('-'.join([d['$'] for d in aff]))
                info["auth_afid"] = (';'.join(affs) or None)
            except KeyError:
                pass
            date = item.get('prism:coverDate')
            if isinstance(date, list):
                date = date[0].get('$')
            new = doc(article_number=item.get('article-number'),
                      title=item.get('dc:title'),
                      fund_sponsor=item.get('fund-sponsor'),
                      subtype=item.get('subtype'),
                      issn=item.get('prism:issn'),
                      creator=item.get('dc:creator'),
                      affilname=info.get("affilname"),
                      author_names=info.get("auth_names"),
                      doi=item.get('prism:doi'),
                      coverDate=date,
                      volume=item.get('prism:volume'),
                      coverDisplayDate=item.get('prism:coverDisplayDate'),
                      publicationName=item.get('prism:publicationName'),
                      source_id=item.get('source-id'),
                      author_ids=info.get("auth_ids"),
                      aggregationType=item.get('prism:aggregationType'),
                      issueIdentifier=item.get('prism:issueIdentifier'),
                      pageRange=item.get('prism:pageRange'),
                      author_afids=info.get("auth_afid"),
                      fund_no=item.get('fund-no'),
                      affiliation_country=info.get("aff_country"),
                      citedby_count=item.get('citedby-count'),
                      openaccess=item.get('openaccess'),
                      eIssn=item.get('prism:eIssn'),
                      author_count=item.get('author-count', {}).get('$'),
                      affiliation_city=info.get("aff_city"),
                      afid=info.get("afid"),
                      description=item.get('dc:description'),
                      pii=item.get('pii'),
                      authkeywords=item.get('authkeywords'),
                      eid=item.get('eid'),
                      fund_acr=item.get('fund-acr'),
                      pubmed_id=item.get('pubmed-id'))
            out.append(new)
        return out or None
Ejemplo n.º 27
0
    def __init__(self,
                 author_id: Union[int, str],
                 refresh: Union[bool, int] = False,
                 view: str = "ENHANCED",
                 **kwds: str) -> None:
        """Interaction with the Author Retrieval API.

        :param author_id: The ID or the EID of the author.
        :param refresh: Whether to refresh the cached file if it exists or not.
                        If int is passed, cached file will be refreshed if the
                        number of days since last modification exceeds that value.
        :param view: The view of the file that should be downloaded.  Allowed
                     values: METRICS, LIGHT, STANDARD, ENHANCED, where STANDARD
                     includes all information of LIGHT view and ENHANCED
                     includes all information of any view.  For details see
                     https://dev.elsevier.com/sc_author_retrieval_views.html.
                     Note: Neither the BASIC nor the DOCUMENTS view are active,
                     although documented.
        :param kwds: Keywords passed on as query parameters.  Must contain
                     fields and values mentioned in the API specification at
                     https://dev.elsevier.com/documentation/AuthorRetrievalAPI.wadl.

        Raises
        ------
        ValueError
            If any of the parameters `refresh` or `view` is not
            one of the allowed values.

        Notes
        -----
        The directory for cached results is `{path}/ENHANCED/{author_id}`,
        where `path` is specified in your configuration file, and `author_id`
        is stripped of an eventually leading `'9-s2.0-'`.
        """
        # Checks
        allowed_views = ('METRICS', 'LIGHT', 'STANDARD', 'ENHANCED')
        check_parameter_value(view, allowed_views, "view")

        # Load json
        self._id = str(author_id).split('-')[-1]
        self._view = view
        self._refresh = refresh
        Retrieval.__init__(self,
                           identifier=self._id,
                           api='AuthorRetrieval',
                           **kwds)

        # Parse json
        self._json = self._json['author-retrieval-response']
        try:
            self._json = self._json[0]
        except KeyError:  # Incomplete forward
            alias_json = listify(self._json['alias']['prism:url'])
            self._alias = [d['$'].split(':')[-1] for d in alias_json]
            alias_str = ', '.join(self._alias)
            text = f'Author profile with ID {author_id} has been merged and '\
                   f'the main profile is now one of {alias_str}.  Please update '\
                   'your records manually.  Functionality of this object is '\
                   'reduced.'
            warn(text, UserWarning)
        else:
            self._alias = None
        self._profile = self._json.get("author-profile", {})
Ejemplo n.º 28
0
 def classificationgroup(self) -> Optional[List[Tuple[int, int]]]:
     """List with (subject group ID, number of documents)-tuples."""
     path = ['classificationgroup', 'classifications', 'classification']
     out = [(int(item['$']), int(item['@frequency']))
            for item in listify(chained_get(self._profile, path, []))]
     return out or None