Exemple #1
0
 def classificationgroup(self):
     """List with (subject group ID, number of documents)-tuples."""
     path = ['author-profile', 'classificationgroup', 'classifications',
             'classification']
     out = [(item['$'], item['@frequency']) for item in
            listify(chained_get(self._json, path, []))]
     return out or None
 def isbn(self):
     """ISBNs belonging to publicationName as tuple of variying length,
     (e.g. ISBN-10 or ISBN-13)."""
     isbns = listify(chained_get(self._head, ['source', 'isbn'], []))
     if len(isbns) == 0:
         return None
     else:
         return tuple((i['$'] for i in isbns))
 def confsponsor(self):
     """Sponsor(s) of the conference the abstract belongs to."""
     sponsors = chained_get(self._confevent, ['confsponsors', 'confsponsor'], [])
     if len(sponsors) == 0:
         return None
     if isinstance(sponsors, list):
         return [s['$'] for s in sponsors]
     return sponsors
 def name_variants(self):
     """A list of namedtuples representing variants of the affiliation name
     with number of documents referring to this variant.
     """
     out = []
     variant = namedtuple('Variant', 'name doc_count')
     for var in chained_get(self._json, ['name-variants', 'name-variant'], []):
         new = variant(name=var['$'], doc_count=var.get('@doc-count'))
         out.append(new)
     return out
Exemple #5
0
 def subject_areas(self):
     """List of named tuples of subject areas in the form
     (area, abbreviation, code) of author's publication.
     """
     path = ['subject-areas', 'subject-area']
     area = namedtuple('Subjectarea', 'area abbreviation code')
     areas = [area(area=item['$'], code=item['@code'],
                   abbreviation=item['@abbrev'])
              for item in chained_get(self._json, path, [])]
     return areas or None
 def subject_areas(self):
     """List of namedtuples containing subject areas of the article
     in the form ().
     Note: Requires the FULL view of the article.
     """
     area = namedtuple('Area', 'area abbreviation code')
     path = ['subject-areas', 'subject-area']
     out = [area(area=item['$'], abbreviation=item['@abbrev'],
                 code=item['@code'])
            for item in listify(chained_get(self._json, path, []))]
     return out or None
 def publisher(self):
     """Name of the publisher of the abstract.
     Note: Information provided in the FULL view of the article might be
     more complete.
     """
     # Return information from FULL view, fall back to other views
     full = chained_get(self._head, ['source', 'publisher', 'publishername'])
     if full is None:
         return self._json['coredata'].get('dc:publisher')
     else:
         return full
Exemple #8
0
 def journal_history(self):
     """List of named tuples of authored publications in the form
     (sourcetitle, abbreviation, type, issn).  issn is only given
     for journals.  abbreviation and issn may be None.
     """
     jour = namedtuple('Journal', 'sourcetitle abbreviation type issn')
     path = ['author-profile', 'journal-history', 'journal']
     hist = [jour(sourcetitle=pub['sourcetitle'], issn=pub.get('issn'),
                  abbreviation=pub.get('sourcetitle-abbrev'),
                  type=pub['@type'])
             for pub in listify(chained_get(self._json, path, []))]
     return hist or None
Exemple #9
0
 def name_variants(self):
     """List of named tuples containing variants of the author name with
     number of documents published with that variant.
     """
     fields = 'indexed_name initials surname given_name doc_count'
     variant = namedtuple('Variant', fields)
     path = ['author-profile', 'name-variant']
     out = [variant(indexed_name=var['indexed-name'], surname=var['surname'],
                    doc_count=var.get('@doc-count'), initials=var['initials'],
                    given_name=var.get('given-name'))
            for var in listify(chained_get(self._json, path, []))]
     return out or None
Exemple #10
0
 def authors(self):
     """A list of namedtuples representing the article's authors, in the
     form (auid, indexed_name, surname, given_name, affiliation_id,
     affiliation, city, country).
     Note: The affiliation referred to here is what Scopus' algorithm
     determined as the main affiliation.  Property `authorgroup` provides
     all affiliations.
     """
     out = []
     fields = 'auid indexed_name surname given_name affiliation'
     auth = namedtuple('Author', fields)
     for item in chained_get(self._json, ['authors', 'author'], []):
         affs = [a for a in listify(item.get('affiliation')) if a]
         if affs:
             aff = [aff.get('@id') for aff in affs]
         else:
             aff = None
         new = auth(auid=item['@auid'], surname=item.get('ce:surname'),
             indexed_name=item.get('ce:indexed-name'), affiliation=aff,
             given_name=chained_get(item, ['preferred-name', 'ce:given-name']))
         out.append(new)
     return out or None
Exemple #11
0
 def sequencebank(self):
     """List of namedtuples representing biological entities defined or
     mentioned in the text, in the form (name, sequence_number, type).
     """
     path = ['enhancement', 'sequencebanks', 'sequencebank']
     items = listify(chained_get(self._head, path, []))
     bank = namedtuple('Sequencebank', 'name sequence_number type')
     out = []
     for item in items:
         numbers = listify(item['sequence-number'])
         for number in numbers:
             new = bank(name=item['@name'], sequence_number=number['$'],
                        type=number['@type'])
             out.append(new)
     return out or None
Exemple #12
0
 def contributor_group(self):
     """List of namedtuples representing contributors compiled by Scopus,
     in the form (given_name, initials, surname, indexed_name, role).
     """
     items = listify(chained_get(self._head, ['source', 'contributor-group'], []))
     out = []
     fields = 'given_name initials surname indexed_name role'
     pers = namedtuple('Contributor', fields)
     for item in items:
         entry = item.get('contributor', {})
         new = pers(indexed_name=entry.get('ce:indexed-name'),
             role=entry.get('@role'), surname=entry.get('ce:surname'),
             given_name=entry.get('ce:given-name'),
             initials=entry.get('ce:initials'))
         out.append(new)
     return out or None
Exemple #13
0
 def funding(self):
     """List of namedtuples parsed funding information in the form
     (agency string id acronym country).
     """
     path = ['item', 'xocs:meta', 'xocs:funding-list', 'xocs:funding']
     funds = listify(chained_get(self._json, path, []))
     out = []
     fund = namedtuple('Funding', 'agency string id acronym country')
     for item in funds:
         new = fund(agency=item.get('xocs:funding-agency'),
             string=item.get('xocs:funding-agency-matched-string'),
             id=item.get('xocs:funding-agency-id'),
             acronym=item.get('xocs:funding-agency-acronym'),
             country=item.get('xocs:funding-agency-country'))
         out.append(new)
     return out or None
Exemple #14
0
 def chemicals(self):
     """List of namedtuples representing chemical entities in the form
     (source, chemical_name, cas_registry_number).  In case multiple
     numbers given, they are joined on ";".
     """
     path = ['enhancement', 'chemicalgroup', 'chemicals']
     items = listify(chained_get(self._head, path, []))
     chemical = namedtuple('Chemical', 'source chemical_name cas_registry_number')
     out = []
     for item in items:
         for chem in listify(item['chemical']):
             number = chem.get('cas-registry-number')
             try:  # Multiple numbers given
                 num = ";".join([n['$'] for n in number])
             except TypeError:
                 num = number
             new = chemical(source=item['@source'], cas_registry_number=num,
                            chemical_name=chem['chemical-name'])
             out.append(new)
     return out or None
Exemple #15
0
 def authorgroup(self):
     """A list of namedtuples representing the article's authors organized
     by affiliation, in the form (affiliation_id, dptid, organization,
     city, postalcode, addresspart, country, auid, indexed_name,
     surname, given_name).
     If "given_name" is not present, fall back to initials.
     Note: Affiliation information might be missing or mal-assigned even
     when it lookes correct in the web view.  In this case please request
     a correction.
     """
     out = []
     fields = 'affiliation_id dptid organization city postalcode '\
              'addresspart country auid indexed_name surname given_name'
     auth = namedtuple('Author', fields)
     items = listify(self._head.get('author-group', []))
     for item in items:
         # Affiliation information
         aff = item.get('affiliation', {})            
         try:
             aff_ids = listify(aff['affiliation-id'])
             aff_id = ", ".join([a["@afid"] for a in aff_ids])
         except KeyError:
             aff_id = aff.get("@afid")
         org = _get_org(aff)
         # Author information (might relate to collaborations)
         authors = listify(item.get('author', item.get('collaboration', [])))
         for au in authors:
             try:
                 given = au.get('ce:given-name', au['ce:initials'])
             except KeyError:  # Collaboration
                 given = au.get('ce:text')
             new = auth(affiliation_id=aff_id, organization=org,
                        city=aff.get('city'), dptid=aff.get("@dptid"),
                        postalcode=aff.get('postal-code'),
                        addresspart=aff.get('address-part'),
                        country=aff.get('country'), auid=au.get('@auid'),
                        surname=au.get('ce:surname'), given_name=given,
                        indexed_name=chained_get(au, ['preferred-name', 'ce:indexed-name']))
             out.append(new)
     return out or None
 def state(self):
     """The state (country's administrative sububunit) of the affiliation."""
     path = ['institution-profile', 'address', 'state']
     return chained_get(self._json, path)
Exemple #17
0
 def refcount(self):
     """Number of references of an article.
     Note: Requires the FULL view of the article.
     """
     path = ['item', 'bibrecord', 'tail', 'bibliography', '@refcount']
     return chained_get(self._json, path)
Exemple #18
0
 def publisheraddress(self):
     """Name of the publisher of the abstract."""
     return chained_get(self._head, ['source', 'publisher', 'publisheraddress'])
Exemple #19
0
 def surname(self):
     """Author's preferred surname."""
     path = ['author-profile', 'preferred-name', 'surname']
     return chained_get(self._json, path)
Exemple #20
0
 def status(self):
     """The status of the author profile."""
     return chained_get(self._json, ["author-profile", "status"])
Exemple #21
0
 def indexed_name(self):
     """Author's name as indexed by Scopus."""
     path = ['author-profile', 'preferred-name', 'indexed-name']
     return chained_get(self._json, path)
Exemple #22
0
 def given_name(self):
     """Author's preferred given name."""
     path = ['author-profile', 'preferred-name', 'given-name']
     return chained_get(self._json, path)
Exemple #23
0
 def website(self):
     """Website of publisher."""
     return chained_get(self._head, ['source', 'website', 'ce:e-address', '$'])
Exemple #24
0
 def funding_text(self):
     """The raw text from which Scopus derives funding information."""
     path = ['item', 'xocs:meta', 'xocs:funding-list', 'xocs:funding-text']
     return chained_get(self._json, path)
Exemple #25
0
    def references(self):
        """List of namedtuples representing references listed in the abstract,
        in the form (position, id, doi, title, authors, authors_auid,
        authors_affiliationid, sourcetitle, publicationyear, volume, issue, first,
        last, citedbycount, text, fulltext).
        `position` is the number at which the reference appears in the
        document, `id` is the Scopus ID of the referenced abstract (EID
        without the "2-s2.0-"), `authors` is a string of the names of the
        authors in the format "Surname1, Initials1; Surname2, Initials2",
        `authors_auid` is a string of the author IDs joined on "; ",
        `authors_affiliationid` is a string of the authors' affiliation IDs
        joined on "; ", `sourcetitle` is the name of the source (e.g. the
        journal), `publicationyear` is the year of the publication as a string,
        `volume` and `issue`, are strings referring to the volume and issue,
        `first` and `last` refer to the page range, `citedbycount` is a string
        for the total number of citations of the cited item, `text` is
        Scopus-provided information on the publication, `fulltext` is the text
        the authors used for the reference.

        Note: Requires either the FULL view or REF view of the article.  Might
        be empty even if refcount is positive.  Specific fields can be empty.
        Author lists (authors, authors_auid, authors_affiliationid) may contain
        duplicates but have been filtered of None's.
        """
        out = []
        fields = 'position id doi title authors authors_auid '\
                 'authors_affiliationid sourcetitle publicationyear volume '\
                 'issue first last citedbycount text fulltext'
        ref = namedtuple('Reference', fields)
        path = ['item', 'bibrecord', 'tail', 'bibliography', 'reference']
        items = listify(chained_get(self._json, path,
                    self._json.get('references', {}).get('reference', [])))
        for item in items:
            info = item.get('ref-info', item)
            volisspag = info.get('volisspag', {}) or {}
            if isinstance(volisspag, list):
                volisspag = volisspag[0]
            # Parse author information
            try:  # FULL view parsing
                auth = listify(item['ref-info']['ref-authors']['author'])
                authors = [', '.join([d['ce:surname'], d['ce:initials']])
                           for d in auth]
                auids = None
                affids = None
            except KeyError:  # REF view parsing
                auth = (info.get('author-list') or {}).get('author', [])
                authors = [', '.join(filter(None, [d.get('ce:surname'),
                                                   d.get('ce:given-name')]))
                           for d in auth]
                auids = "; ".join(filter(None, [d.get('@auid') for d in auth]))
                affs = filter(None, [d.get('affiliation') for d in auth])
                affids = "; ".join([aff.get('@id') for aff in affs])
            # Parse IDs
            try:
                ids = listify(info['refd-itemidlist']['itemid'])
            except KeyError:
                ids = []
            try:
                doi = _select_by_idtype(ids, 'DOI')[0]
            except IndexError:
                doi = info.get('ce:doi')
            try:
                scopus_id = _select_by_idtype(ids, 'SGR')[0]
            except IndexError:
                scopus_id = info.get('scopus-id')
            # Combine information
            new = ref(position=item.get('@id'),
                      id=scopus_id,
                      doi=doi,
                      authors="; ".join(authors),
                      authors_auid=auids or None,
                      authors_affiliationid=affids or None,
                      title=info.get('ref-title', {}).get('ref-titletext', info.get('title')),
                      sourcetitle=info.get('ref-sourcetitle', info.get('sourcetitle')),
                      publicationyear=info.get('ref-publicationyear', {}).get('@first'),
                      volume=volisspag.get('voliss', {}).get('@volume'),
                      issue=volisspag.get('voliss', {}).get('@issue'),
                      first=volisspag.get('pagerange', {}).get('@first'),
                      last=volisspag.get('pagerange', {}).get('@last'),
                      citedbycount=info.get('citedby-count'),
                      text=info.get('ref-text'),
                      fulltext=item.get('ref-fulltext'))
            out.append(new)
        return out or None
Exemple #26
0
 def refcount(self):
     """Number of references of an article.
     Note: Requires the FULL view of the article.
     """
     path = ['item', 'bibrecord', 'tail', 'bibliography', '@refcount']
     return chained_get(self._json, path)
 def postal_code(self):
     """The postal code of the affiliation."""
     path = ['institution-profile', 'address', 'postal-code']
     return chained_get(self._json, path)
Exemple #28
0
    def references(self):
        """List of namedtuples representing references listed in the abstract,
        in the form (position, id, doi, title, authors, authors_auid,
        authors_affiliationid, sourcetitle, publicationyear, volume, issue, first,
        last, citedbycount, text, fulltext).
        `position` is the number at which the reference appears in the
        document, `id` is the Scopus ID of the referenced abstract (EID
        without the "2-s2.0-"), `authors` is a string of the names of the
        authors in the format "Surname1, Initials1; Surname2, Initials2",
        `authors_auid` is a string of the author IDs joined on "; ",
        `authors_affiliationid` is a string of the authors' affiliation IDs
        joined on "; ", `sourcetitle` is the name of the source (e.g. the
        journal), `publicationyear` is the year of the publication as a string,
        `volume` and `issue`, are strings referring to the volume and issue,
        `first` and `last` refer to the page range, `citedbycount` is a string
        for the total number of citations of the cited item, `text` is
        Scopus-provided information on the publication, `fulltext` is the text
        the authors used for the reference.

        Note: Requires either the FULL view or REF view of the article.  Might
        be empty even if refcount is positive.  Specific fields can be empty.
        Author lists (authors, authors_auid, authors_affiliationid) may contain
        duplicates but have been filtered of None's.
        """
        out = []
        fields = 'position id doi title authors authors_auid '\
                 'authors_affiliationid sourcetitle publicationyear volume '\
                 'issue first last citedbycount text fulltext'
        ref = namedtuple('Reference', fields)
        path = ['item', 'bibrecord', 'tail', 'bibliography', 'reference']
        items = listify(
            chained_get(self._json, path,
                        self._json.get('references', {}).get('reference', [])))
        for item in items:
            info = item.get('ref-info', item)
            volisspag = info.get('volisspag', {}) or {}
            if isinstance(volisspag, list):
                volisspag = volisspag[0]
            # Parse author information
            try:  # FULL view parsing
                auth = listify(item['ref-info']['ref-authors']['author'])
                authors = [
                    ', '.join([d['ce:surname'], d['ce:initials']])
                    for d in auth
                ]
                auids = None
                affids = None
            except KeyError:  # REF view parsing
                auth = (info.get('author-list') or {}).get('author', [])
                authors = [
                    ', '.join(
                        filter(None,
                               [d.get('ce:surname'),
                                d.get('ce:given-name')])) for d in auth
                ]
                auids = "; ".join(filter(None, [d.get('@auid') for d in auth]))
                affs = filter(None, [d.get('affiliation') for d in auth])
                affids = "; ".join([aff.get('@id') for aff in affs])
            # Parse IDs
            try:
                ids = listify(info['refd-itemidlist']['itemid'])
            except KeyError:
                ids = []
            try:
                doi = [d['$'] for d in ids if d['@idtype'] == 'DOI'][0]
            except IndexError:
                doi = info.get('ce:doi')
            try:
                scopus_id = [d['$'] for d in ids if d['@idtype'] == 'SGR'][0]
            except IndexError:
                scopus_id = info.get('scopus-id')
            # Combine information
            new = ref(position=item.get('@id'),
                      id=scopus_id,
                      doi=doi,
                      authors="; ".join(authors),
                      authors_auid=auids or None,
                      authors_affiliationid=affids or None,
                      title=info.get('ref-title',
                                     {}).get('ref-titletext',
                                             info.get('title')),
                      sourcetitle=info.get('ref-sourcetitle',
                                           info.get('sourcetitle')),
                      publicationyear=info.get('ref-publicationyear',
                                               {}).get('@first'),
                      volume=volisspag.get('voliss', {}).get('@volume'),
                      issue=volisspag.get('voliss', {}).get('@issue'),
                      first=volisspag.get('pagerange', {}).get('@first'),
                      last=volisspag.get('pagerange', {}).get('@last'),
                      citedbycount=info.get('citedby-count'),
                      text=info.get('ref-text'),
                      fulltext=item.get('ref-fulltext'))
            out.append(new)
        return out or None
Exemple #29
0
 def initials(self):
     """Author's preferred initials."""
     path = ['author-profile', 'preferred-name', 'initials']
     return chained_get(self._json, path)
Exemple #30
0
 def state(self):
     """The state (country's administrative sububunit) of the affiliation."""
     path = ['institution-profile', 'address', 'state']
     return chained_get(self._json, path)
Exemple #31
0
 def postal_code(self):
     """The postal code of the affiliation."""
     path = ['institution-profile', 'address', 'postal-code']
     return chained_get(self._json, path)
Exemple #32
0
 def funding_text(self):
     """The raw text from which Scopus derives funding information."""
     path = ['item', 'xocs:meta', 'xocs:funding-list', 'xocs:funding-text']
     return chained_get(self._json, path)
Exemple #33
0
 def website(self):
     """Website of publisher."""
     return chained_get(self._head,
                        ['source', 'website', 'ce:e-address', '$'])
Exemple #34
0
 def publisheraddress(self):
     """Name of the publisher of the abstract."""
     return chained_get(self._head,
                        ['source', 'publisher', 'publisheraddress'])
Exemple #35
0
    def __init__(self, identifier=None, view='META_ABS', refresh=False,
                 id_type=None, EID=None):
        """Class to represent the results from a Scopus abstract.

        Parameters
        ----------
        identifier : str or int
            The identifier of an abstract.  Can be the Scoups EID, the Scopus
            ID, the PII, the Pubmed-ID or the DOI.

        EID : str (deprecated since 1.2)
            Deprecated in favor of `identifier`, will be removed in a future
            release.

        id_type: str (optional, default=None)
            The type of used ID. Allowed values: None, 'eid', 'pii',
            'scopus_id', 'pubmed_id', 'doi'.  If the value is None, the
            function tries to infer the ID type itself.

        view : str (optional, default=META_ABS)
            The view of the file that should be downloaded.  Will not take
            effect for already cached files.  Allowed values: META, META_ABS,
            REF, FULL, where FULL includes all information of META_ABS view
            and META_ABS includes all information of the META view.  See
            https://dev.elsevier.com/guides/AbstractRetrievalViews.htm
            for details.

        refresh : bool (optional, default=False)
            Whether to refresh the cached file if it exists or not.

        Raises
        ------
        ValueError
            If the id_type parameter or the view parameter contains
            invalid entries.

        Notes
        -----
        The files are cached in ~/.scopus/abstract_retrieval/{identifier}.  In
        case a DOI is used as identifier, an underscore replaces the forward
        slash in the filename.
        """
        # Checks
        if identifier is None and EID:
            text = "Parameter EID is deprecated in favor of parameter "\
                   "identifier.  Please update your code."
            warn(text, UserWarning)
            identifier = EID
        identifier = str(identifier)
        allowed_views = ('META', 'META_ABS', 'REF', 'FULL')
        if view not in allowed_views:
            raise ValueError('view parameter must be one of ' +
                             ', '.join(allowed_views))
        if id_type is None:
            id_type = detect_id_type(identifier)
        else:
            allowed_id_types = ('eid', 'pii', 'scopus_id', 'pubmed_id', 'doi')
            if id_type not in allowed_id_types:
                raise ValueError('id_type parameter must be one of ' +
                                 ', '.join(allowed_id_types))
        # Load json
        Retrieval.__init__(self, identifier=identifier, id_type=id_type,
                           api='AbstractRetrieval', refresh=refresh, view=view)
        self._json = self._json['abstracts-retrieval-response']
        self._head = chained_get(self._json, ["item", "bibrecord", "head"], {})
        path = ['source', 'additional-srcinfo', 'conferenceinfo', 'confevent']
        self._confevent = chained_get(self._head, path, {})
Exemple #36
0
 def language(self):
     """Language of the article."""
     return chained_get(self._json, ['language', '@xml:lang'])
Exemple #37
0
 def historical_identifier(self):
     """Scopus IDs of previous profiles now compromising this profile."""
     hist = chained_get(self._json, ["coredata", 'historical-identifier'], [])
     return [d['$'].split(":")[-1] for d in hist] or None
Exemple #38
0
 def language(self):
     """Language of the article."""
     return chained_get(self._json, ['language', '@xml:lang'])