예제 #1
0
 def references(self):
     """List of namedtuples representing references listed in the abstract,
     in the form (position, id, doi, title, authors, sourcetitle,
     publicationyear, volume, issue, first, last, text, fulltext).
     `position` is the number at which the reference appears in the
     document, `id` is the Scopus ID of the referenced abstract (EID
     without the "2-s2.0-"), `authors` is a list of the names of the
     authors in the format "Surname, Initials", `first` and `last` refer
     to the page range, `text` is Scopus-provided information on the
     publication and `fulltext` is the text the authors used for
     the reference.
     Note: Requires the FULL view of the article.  Might be empty even if
     refcount is positive.
     """
     out = []
     fields = 'position id doi title authors sourcetitle publicationyear '\
              'volume issue first last text fulltext'
     ref = namedtuple('Reference', fields)
     path = ['item', 'bibrecord', 'tail', 'bibliography', 'reference']
     items = listify(chained_get(self._json, path, []))
     for item in items:
         info = item['ref-info']
         volisspag = info.get('ref-volisspag', {})
         try:
             auth = listify(info['ref-authors']['author'])
             authors = [
                 ', '.join([d['ce:surname'], d['ce:initials']])
                 for d in auth
             ]
         except KeyError:  # No authors given
             authors = None
         ids = listify(info['refd-itemidlist']['itemid'])
         try:
             doi = [d['$'] for d in ids if d['@idtype'] == 'DOI'][0]
         except IndexError:
             doi = None
         new = ref(position=item.get('@id'),
                   id=[d['$'] for d in ids if d['@idtype'] == 'SGR'][0],
                   doi=doi,
                   authors=authors,
                   title=info.get('ref-title', {}).get('ref-titletext'),
                   sourcetitle=info.get('ref-sourcetitle'),
                   publicationyear=info.get('ref-publicationyear',
                                            {}).get('@first'),
                   volume=volisspag.get('voliss', {}).get('@volume'),
                   issue=volisspag.get('voliss', {}).get('@issue'),
                   first=volisspag.get('pagerange', {}).get('@first'),
                   last=volisspag.get('pagerange', {}).get('@last'),
                   text=info.get('ref-text'),
                   fulltext=item.get('ref-fulltext'))
         out.append(new)
     return out or None
예제 #2
0
 def sequencebank(self):
     """List of namedtuples representing biological entities defined or
     mentioned in the text, in the form (name, sequence_number, type).
     """
     path = ['enhancement', 'sequencebanks', 'sequencebank']
     items = listify(chained_get(self._head, path, []))
     bank = namedtuple('Sequencebank', 'name sequence_number type')
     out = []
     for item in items:
         numbers = listify(item['sequence-number'])
         for number in numbers:
             new = bank(name=item['@name'], sequence_number=number['$'],
                        type=number['@type'])
             out.append(new)
     return out or None
예제 #3
0
 def authors(self):
     """A list of namedtuples representing the article's authors, in the
     form (auid, indexed_name, surname, given_name, affiliation_id,
     affiliation, city, country).
     Note: The affiliation referred to here is what Scopus' algorithm
     determined as the main affiliation.  Property `authorgroup` provides
     all affiliations.
     """
     out = []
     fields = 'auid indexed_name surname given_name affiliation'
     auth = namedtuple('Author', fields)
     for item in chained_get(self._json, ['authors', 'author'], []):
         affs = [a for a in listify(item.get('affiliation')) if a]
         if affs:
             aff = [aff.get('@id') for aff in affs]
         else:
             aff = None
         new = auth(auid=item['@auid'],
                    surname=item.get('ce:surname'),
                    indexed_name=item.get('ce:indexed-name'),
                    affiliation=aff,
                    given_name=chained_get(
                        item, ['preferred-name', 'ce:given-name']))
         out.append(new)
     return out or None
예제 #4
0
 def classificationgroup(self):
     """List with (subject group ID, number of documents)-tuples."""
     path = ['author-profile', 'classificationgroup', 'classifications',
             'classification']
     out = [(item['$'], item['@frequency']) for item in
            listify(chained_get(self._json, path, []))]
     return out or None
예제 #5
0
    def __init__(self, author_id, refresh=False):
        """Class to represent a Scopus Author query by the scopus-id.

        Parameters
        ----------
        author_id : str or int
            The ID of the author to search for.  Optionally expressed
            as an Elsevier EID (i.e., in the form 9-s2.0-nnnnnnnn).

        refresh : bool (optional, default=False)
            Whether to refresh the cached file (if it exists) or not.

        Notes
        -----
        The files are cached in ~/.scopus/author_retrieval/{author_id} (without
        eventually leading '9-s2.0-').
        """
        # Load json
        self._id = str(int(str(author_id).split('-')[-1]))
        Retrieval.__init__(self, self._id, 'AuthorRetrieval', refresh)
        self._json = self._json['author-retrieval-response']
        # Checks
        try:
            self._json = self._json[0]
        except KeyError:  # Incomplete forward
            alias_json = listify(self._json['alias']['prism:url'])
            alias = ', '.join([d['$'].split(':')[-1] for d in alias_json])
            text = 'Author profile with ID {} has been merged and the main '\
                   'profile is now one of {}.  Please update your records '\
                   'manually.  Functionality of this object is '\
                   'reduced.'.format(author_id, alias)
            warn(text, UserWarning)
예제 #6
0
 def authors(self):
     """A list of namedtuples representing the article's authors, in the
     form (auid, indexed_name, surname, given_name, affiliation_id,
     affiliation, city, country).
     Note: Affiliations listed here are often incomplete and sometimes
     use the first author's affiliation for all others.  Rather use
     property author_group.
     """
     out = []
     fields = 'auid indexed_name surname given_name affiliation'
     auth = namedtuple('Author', fields)
     for item in chained_get(self._json, ['authors', 'author'], []):
         affs = [a for a in listify(item.get('affiliation')) if a]
         if affs:
             aff = [aff.get('@id') for aff in affs]
         else:
             aff = None
         new = auth(auid=item['@auid'],
                    surname=item.get('ce:surname'),
                    indexed_name=item.get('ce:indexed-name'),
                    affiliation=aff,
                    given_name=chained_get(
                        item, ['preferred-name', 'ce:given-name']))
         out.append(new)
     return out or None
예제 #7
0
    def __init__(self, author_id, refresh=False):
        """Class to represent a Scopus Author query by the scopus-id.

        Parameters
        ----------
        author_id : str or int
            The ID of the author to search for.  Optionally expressed
            as an Elsevier EID (i.e., in the form 9-s2.0-nnnnnnnn).

        refresh : bool (optional, default=False)
            Whether to refresh the cached file (if it exists) or not.

        Notes
        -----
        The files are cached in ~/.scopus/author_retrieval/{author_id} (without
        eventually leading '9-s2.0-').
        """
        # Load json
        self._id = str(int(str(author_id).split('-')[-1]))
        Retrieval.__init__(self, identifier=self._id, api='AuthorRetrieval',
                           refresh=refresh)
        self._json = self._json['author-retrieval-response']
        # Checks
        try:
            self._json = self._json[0]
        except KeyError:  # Incomplete forward
            alias_json = listify(self._json['alias']['prism:url'])
            alias = ', '.join([d['$'].split(':')[-1] for d in alias_json])
            text = 'Author profile with ID {} has been merged and the main '\
                   'profile is now one of {}.  Please update your records '\
                   'manually.  Functionality of this object is '\
                   'reduced.'.format(author_id, alias)
            warn(text, UserWarning)
예제 #8
0
 def sequencebank(self):
     """List of namedtuples representing biological entities defined or
     mentioned in the text, in the form (name, sequence_number, type).
     """
     path = ['enhancement', 'sequencebanks', 'sequencebank']
     items = listify(chained_get(self._head, path, []))
     bank = namedtuple('Sequencebank', 'name sequence_number type')
     out = []
     for item in items:
         numbers = listify(item['sequence-number'])
         for number in numbers:
             new = bank(name=item['@name'],
                        sequence_number=number['$'],
                        type=number['@type'])
             out.append(new)
     return out or None
예제 #9
0
 def isbn(self):
     """ISBNs belonging to publicationName as tuple of variying length,
     (e.g. ISBN-10 or ISBN-13)."""
     isbns = listify(chained_get(self._head, ['source', 'isbn'], []))
     if len(isbns) == 0:
         return None
     else:
         return tuple((i['$'] for i in isbns))
예제 #10
0
 def isbn(self):
     """ISBNs belonging to publicationName as tuple of variying length,
     (e.g. ISBN-10 or ISBN-13)."""
     isbns = listify(chained_get(self._head, ['source', 'isbn'], []))
     if len(isbns) == 0:
         return None
     else:
         return tuple((i['$'] for i in isbns))
예제 #11
0
 def classificationgroup(self):
     """List with (subject group ID, number of documents)-tuples."""
     path = [
         'author-profile', 'classificationgroup', 'classifications',
         'classification'
     ]
     out = [(item['$'], item['@frequency'])
            for item in listify(chained_get(self._json, path, []))]
     return out or None
예제 #12
0
 def idxterms(self):
     """List of index terms."""
     try:
         terms = listify(self._json.get("idxterms", {}).get('mainterm', []))
     except AttributeError:  # idxterms is empty
         return None
     try:
         return [d['$'] for d in terms]
     except AttributeError:
         return None
예제 #13
0
 def idxterms(self):
     """List of index terms."""
     try:
         terms = listify(self._json.get("idxterms", {}).get('mainterm', []))
     except AttributeError:  # idxterms is empty
         return None
     try:
         return [d['$'] for d in terms]
     except AttributeError:
         return None
예제 #14
0
 def subject_areas(self):
     """List of namedtuples containing subject areas of the article
     in the form ().
     Note: Requires the FULL view of the article.
     """
     area = namedtuple('Area', 'area abbreviation code')
     path = ['subject-areas', 'subject-area']
     out = [area(area=item['$'], abbreviation=item['@abbrev'],
                 code=item['@code'])
            for item in listify(chained_get(self._json, path, []))]
     return out or None
예제 #15
0
 def chemicals(self):
     """List of namedtuples representing chemical entities in the form
     (source, chemical_name, cas_registry_number).  In case multiple
     numbers given, they are joined on ";".
     """
     path = ['enhancement', 'chemicalgroup', 'chemicals']
     items = listify(chained_get(self._head, path, []))
     chemical = namedtuple('Chemical', 'source chemical_name cas_registry_number')
     out = []
     for item in items:
         for chem in listify(item['chemical']):
             number = chem.get('cas-registry-number')
             try:  # Multiple numbers given
                 num = ";".join([n['$'] for n in number])
             except TypeError:
                 num = number
             new = chemical(source=item['@source'], cas_registry_number=num,
                            chemical_name=chem['chemical-name'])
             out.append(new)
     return out or None
예제 #16
0
 def name_variants(self):
     """List of named tuples containing variants of the author name with
     number of documents published with that variant.
     """
     fields = 'indexed_name initials surname given_name doc_count'
     variant = namedtuple('Variant', fields)
     path = ['author-profile', 'name-variant']
     out = [variant(indexed_name=var['indexed-name'], surname=var['surname'],
                    doc_count=var.get('@doc-count'), initials=var['initials'],
                    given_name=var.get('given-name'))
            for var in listify(chained_get(self._json, path, []))]
     return out or None
예제 #17
0
 def journal_history(self):
     """List of named tuples of authored publications in the form
     (sourcetitle, abbreviation, type, issn).  issn is only given
     for journals.  abbreviation and issn may be None.
     """
     jour = namedtuple('Journal', 'sourcetitle abbreviation type issn')
     path = ['author-profile', 'journal-history', 'journal']
     hist = [jour(sourcetitle=pub['sourcetitle'], issn=pub.get('issn'),
                  abbreviation=pub.get('sourcetitle-abbrev'),
                  type=pub['@type'])
             for pub in listify(chained_get(self._json, path, []))]
     return hist or None
예제 #18
0
 def authorgroup(self):
     """A list of namedtuples representing the article's authors organized
     by affiliation, in the form (affiliation_id, dptid, organization,
     city, postalcode, addresspart, country, auid, indexed_name,
     surname, given_name).
     If "given_name" is not present, fall back to initials.
     Note: Affiliation information might be missing or mal-assigned even
     when it lookes correct in the web view.  In this case please request
     a correction.
     """
     out = []
     fields = 'affiliation_id dptid organization city postalcode '\
              'addresspart country auid indexed_name surname given_name'
     auth = namedtuple('Author', fields)
     items = listify(self._head.get('author-group', []))
     for item in items:
         # Affiliation information
         aff = item.get('affiliation', {})            
         try:
             aff_ids = listify(aff['affiliation-id'])
             aff_id = ", ".join([a["@afid"] for a in aff_ids])
         except KeyError:
             aff_id = aff.get("@afid")
         org = _get_org(aff)
         # Author information (might relate to collaborations)
         authors = listify(item.get('author', item.get('collaboration', [])))
         for au in authors:
             try:
                 given = au.get('ce:given-name', au['ce:initials'])
             except KeyError:  # Collaboration
                 given = au.get('ce:text')
             new = auth(affiliation_id=aff_id, organization=org,
                        city=aff.get('city'), dptid=aff.get("@dptid"),
                        postalcode=aff.get('postal-code'),
                        addresspart=aff.get('address-part'),
                        country=aff.get('country'), auid=au.get('@auid'),
                        surname=au.get('ce:surname'), given_name=given,
                        indexed_name=chained_get(au, ['preferred-name', 'ce:indexed-name']))
             out.append(new)
     return out or None
예제 #19
0
 def authorgroup(self):
     """A list of namedtuples representing the article's authors organized
     by affiliation, in the form (affiliation_id, organization, city_group,
     country, auid, indexed_name, surname, given_name).  If "given_name"
     is not present, fall back to initials.
     Note: Affiliation information might be missing or mal-assigned even
     when it lookes correct in the web view.  In this case please request
     a correction.
     """
     out = []
     fields = 'affiliation_id organization city_group country '\
              'auid indexed_name surname given_name'
     auth = namedtuple('Author', fields)
     items = listify(self._head.get('author-group', []))
     for item in items:
         # Affiliation information
         aff = item.get('affiliation', {})
         aff_ids = listify(aff['affiliation-id'])
         aff_id = ", ".join([a["@afid"] for a in aff_ids])
         org = _get_org(aff)
         # Author information (might relate to collaborations)
         authors = listify(item.get('author', item.get('collaboration',
                                                       [])))
         for au in authors:
             try:
                 given = au.get('ce:given-name', au['ce:initials'])
             except KeyError:  # Collaboration
                 given = au.get('ce:text')
             new = auth(affiliation_id=aff_id,
                        organization=org,
                        city_group=aff.get('city-group'),
                        country=aff.get('country'),
                        auid=au.get('@auid'),
                        surname=au.get('ce:surname'),
                        given_name=given,
                        indexed_name=chained_get(
                            au, ['preferred-name', 'ce:indexed-name']))
             out.append(new)
     return out or None
예제 #20
0
 def chemicals(self):
     """List of namedtuples representing chemical entities in the form
     (source, chemical_name, cas_registry_number).  In case multiple
     numbers given, they are joined on ";".
     """
     path = ['enhancement', 'chemicalgroup', 'chemicals']
     items = listify(chained_get(self._head, path, []))
     chemical = namedtuple('Chemical',
                           'source chemical_name cas_registry_number')
     out = []
     for item in items:
         for chem in listify(item['chemical']):
             number = chem.get('cas-registry-number')
             try:  # Multiple numbers given
                 num = ";".join([n['$'] for n in number])
             except TypeError:
                 num = number
             new = chemical(source=item['@source'],
                            cas_registry_number=num,
                            chemical_name=chem['chemical-name'])
             out.append(new)
     return out or None
예제 #21
0
 def subject_areas(self):
     """List of namedtuples containing subject areas of the article
     in the form ().
     Note: Requires the FULL view of the article.
     """
     area = namedtuple('Area', 'area abbreviation code')
     path = ['subject-areas', 'subject-area']
     out = [
         area(area=item['$'],
              abbreviation=item['@abbrev'],
              code=item['@code'])
         for item in listify(chained_get(self._json, path, []))
     ]
     return out or None
예제 #22
0
 def affiliation(self):
     """A list of namedtuples representing listed affiliations in
     the form (id, name, city, country).
     Note: Might be empty.
     """
     out = []
     aff = namedtuple('Affiliation', 'id name city country')
     affs = listify(self._json.get('affiliation', []))
     for item in affs:
         new = aff(id=item.get('@id'), name=item.get('affilname'),
                   city=item.get('affiliation-city'),
                   country=item.get('affiliation-country'))
         out.append(new)
     return out or None
예제 #23
0
 def journal_history(self):
     """List of named tuples of authored publications in the form
     (sourcetitle, abbreviation, type, issn).  issn is only given
     for journals.  abbreviation and issn may be None.
     """
     jour = namedtuple('Journal', 'sourcetitle abbreviation type issn')
     path = ['author-profile', 'journal-history', 'journal']
     hist = [
         jour(sourcetitle=pub['sourcetitle'],
              issn=pub.get('issn'),
              abbreviation=pub.get('sourcetitle-abbrev'),
              type=pub['@type'])
         for pub in listify(chained_get(self._json, path, []))
     ]
     return hist or None
예제 #24
0
 def affiliation(self):
     """A list of namedtuples representing listed affiliations in
     the form (id, name, city, country).
     Note: Might be empty.
     """
     out = []
     aff = namedtuple('Affiliation', 'id name city country')
     affs = listify(self._json.get('affiliation', []))
     for item in affs:
         new = aff(id=item.get('@id'),
                   name=item.get('affilname'),
                   city=item.get('affiliation-city'),
                   country=item.get('affiliation-country'))
         out.append(new)
     return out or None
예제 #25
0
 def contributor_group(self):
     """List of namedtuples representing contributors compiled by Scopus,
     in the form (given_name, initials, surname, indexed_name, role).
     """
     items = listify(chained_get(self._head, ['source', 'contributor-group'], []))
     out = []
     fields = 'given_name initials surname indexed_name role'
     pers = namedtuple('Contributor', fields)
     for item in items:
         entry = item.get('contributor', {})
         new = pers(indexed_name=entry.get('ce:indexed-name'),
             role=entry.get('@role'), surname=entry.get('ce:surname'),
             given_name=entry.get('ce:given-name'),
             initials=entry.get('ce:initials'))
         out.append(new)
     return out or None
예제 #26
0
 def funding(self):
     """List of namedtuples parsed funding information in the form
     (agency string id acronym country).
     """
     path = ['item', 'xocs:meta', 'xocs:funding-list', 'xocs:funding']
     funds = listify(chained_get(self._json, path, []))
     out = []
     fund = namedtuple('Funding', 'agency string id acronym country')
     for item in funds:
         new = fund(agency=item.get('xocs:funding-agency'),
             string=item.get('xocs:funding-agency-matched-string'),
             id=item.get('xocs:funding-agency-id'),
             acronym=item.get('xocs:funding-agency-acronym'),
             country=item.get('xocs:funding-agency-country'))
         out.append(new)
     return out or None
예제 #27
0
 def funding(self):
     """List of namedtuples parsed funding information in the form
     (agency string id acronym country).
     """
     path = ['item', 'xocs:meta', 'xocs:funding-list', 'xocs:funding']
     funds = listify(chained_get(self._json, path, []))
     out = []
     fund = namedtuple('Funding', 'agency string id acronym country')
     for item in funds:
         new = fund(agency=item.get('xocs:funding-agency'),
                    string=item.get('xocs:funding-agency-matched-string'),
                    id=item.get('xocs:funding-agency-id'),
                    acronym=item.get('xocs:funding-agency-acronym'),
                    country=item.get('xocs:funding-agency-country'))
         out.append(new)
     return out or None
예제 #28
0
 def name_variants(self):
     """List of named tuples containing variants of the author name with
     number of documents published with that variant.
     """
     fields = 'indexed_name initials surname given_name doc_count'
     variant = namedtuple('Variant', fields)
     path = ['author-profile', 'name-variant']
     out = [
         variant(indexed_name=var['indexed-name'],
                 surname=var['surname'],
                 doc_count=var.get('@doc-count'),
                 initials=var['initials'],
                 given_name=var.get('given-name'))
         for var in listify(chained_get(self._json, path, []))
     ]
     return out or None
예제 #29
0
 def contributor_group(self):
     """List of namedtuples representing contributors compiled by Scopus,
     in the form (given_name, initials, surname, indexed_name, role).
     """
     items = listify(
         chained_get(self._head, ['source', 'contributor-group'], []))
     out = []
     fields = 'given_name initials surname indexed_name role'
     pers = namedtuple('Contributor', fields)
     for item in items:
         entry = item.get('contributor', {})
         new = pers(indexed_name=entry.get('ce:indexed-name'),
                    role=entry.get('@role'),
                    surname=entry.get('ce:surname'),
                    given_name=entry.get('ce:given-name'),
                    initials=entry.get('ce:initials'))
         out.append(new)
     return out or None
예제 #30
0
 def authors(self):
     """A list of namedtuples representing the article's authors, in the
     form (auid, indexed_name, surname, given_name, affiliation_id,
     affiliation, city, country).
     Note: The affiliation referred to here is what Scopus' algorithm
     determined as the main affiliation.  Property `authorgroup` provides
     all affiliations.
     """
     out = []
     fields = 'auid indexed_name surname given_name affiliation'
     auth = namedtuple('Author', fields)
     for item in chained_get(self._json, ['authors', 'author'], []):
         affs = [a for a in listify(item.get('affiliation')) if a]
         if affs:
             aff = [aff.get('@id') for aff in affs]
         else:
             aff = None
         new = auth(auid=item['@auid'], surname=item.get('ce:surname'),
             indexed_name=item.get('ce:indexed-name'), affiliation=aff,
             given_name=chained_get(item, ['preferred-name', 'ce:given-name']))
         out.append(new)
     return out or None
예제 #31
0
    def references(self):
        """List of namedtuples representing references listed in the abstract,
        in the form (position, id, doi, title, authors, authors_auid,
        authors_affiliationid, sourcetitle, publicationyear, volume, issue, first,
        last, citedbycount, text, fulltext).
        `position` is the number at which the reference appears in the
        document, `id` is the Scopus ID of the referenced abstract (EID
        without the "2-s2.0-"), `authors` is a string of the names of the
        authors in the format "Surname1, Initials1; Surname2, Initials2",
        `authors_auid` is a string of the author IDs joined on "; ",
        `authors_affiliationid` is a string of the authors' affiliation IDs
        joined on "; ", `sourcetitle` is the name of the source (e.g. the
        journal), `publicationyear` is the year of the publication as a string,
        `volume` and `issue`, are strings referring to the volume and issue,
        `first` and `last` refer to the page range, `citedbycount` is a string
        for the total number of citations of the cited item, `text` is
        Scopus-provided information on the publication, `fulltext` is the text
        the authors used for the reference.

        Note: Requires either the FULL view or REF view of the article.  Might
        be empty even if refcount is positive.  Specific fields can be empty.
        Author lists (authors, authors_auid, authors_affiliationid) may contain
        duplicates but have been filtered of None's.
        """
        out = []
        fields = 'position id doi title authors authors_auid '\
                 'authors_affiliationid sourcetitle publicationyear volume '\
                 'issue first last citedbycount text fulltext'
        ref = namedtuple('Reference', fields)
        path = ['item', 'bibrecord', 'tail', 'bibliography', 'reference']
        items = listify(chained_get(self._json, path,
                    self._json.get('references', {}).get('reference', [])))
        for item in items:
            info = item.get('ref-info', item)
            volisspag = info.get('volisspag', {}) or {}
            if isinstance(volisspag, list):
                volisspag = volisspag[0]
            # Parse author information
            try:  # FULL view parsing
                auth = listify(item['ref-info']['ref-authors']['author'])
                authors = [', '.join([d['ce:surname'], d['ce:initials']])
                           for d in auth]
                auids = None
                affids = None
            except KeyError:  # REF view parsing
                auth = (info.get('author-list') or {}).get('author', [])
                authors = [', '.join(filter(None, [d.get('ce:surname'),
                                                   d.get('ce:given-name')]))
                           for d in auth]
                auids = "; ".join(filter(None, [d.get('@auid') for d in auth]))
                affs = filter(None, [d.get('affiliation') for d in auth])
                affids = "; ".join([aff.get('@id') for aff in affs])
            # Parse IDs
            try:
                ids = listify(info['refd-itemidlist']['itemid'])
            except KeyError:
                ids = []
            try:
                doi = _select_by_idtype(ids, 'DOI')[0]
            except IndexError:
                doi = info.get('ce:doi')
            try:
                scopus_id = _select_by_idtype(ids, 'SGR')[0]
            except IndexError:
                scopus_id = info.get('scopus-id')
            # Combine information
            new = ref(position=item.get('@id'),
                      id=scopus_id,
                      doi=doi,
                      authors="; ".join(authors),
                      authors_auid=auids or None,
                      authors_affiliationid=affids or None,
                      title=info.get('ref-title', {}).get('ref-titletext', info.get('title')),
                      sourcetitle=info.get('ref-sourcetitle', info.get('sourcetitle')),
                      publicationyear=info.get('ref-publicationyear', {}).get('@first'),
                      volume=volisspag.get('voliss', {}).get('@volume'),
                      issue=volisspag.get('voliss', {}).get('@issue'),
                      first=volisspag.get('pagerange', {}).get('@first'),
                      last=volisspag.get('pagerange', {}).get('@last'),
                      citedbycount=info.get('citedby-count'),
                      text=info.get('ref-text'),
                      fulltext=item.get('ref-fulltext'))
            out.append(new)
        return out or None
예제 #32
0
    def results(self):
        """A list of namedtuples in the form (eid doi pii pubmed_id title
        subtype creator afid affilname affiliation_city affiliation_country
        author_count author_names author_ids author_afids coverDate
        coverDisplayDate publicationName issn source_id eIssn aggregationType
        volume issueIdentifier article_number pageRange description
        authkeywords citedby_count openaccess fund_acr fund_no fund_sponsor).
        Field definitions correspond to
        https://dev.elsevier.com/guides/ScopusSearchViews.htm, except for
        afid, affilname, affiliation_city, affiliation_country, author_count,
        author_names, author_ids and author_afids:  These information are
        joined on ";".  In case an author has multiple affiliations, they are
        joined on "-" (e.g. Author1Aff;Author2Aff1-Author2Aff2).

        Notes
        -----
        The list of authors and the list of affiliations per author are
        deduplicated.
        """
        out = []
        fields = 'eid doi pii pubmed_id title subtype creator afid affilname '\
                 'affiliation_city affiliation_country author_count '\
                 'author_names author_ids author_afids coverDate '\
                 'coverDisplayDate publicationName issn source_id eIssn '\
                 'aggregationType volume issueIdentifier article_number '\
                 'pageRange description authkeywords citedby_count '\
                 'openaccess fund_acr fund_no fund_sponsor'
        doc = namedtuple('Document', fields)
        for item in self._json:
            info = {}
            # Parse affiliations
            try:
                info["affilname"] = _join(item['affiliation'], 'affilname')
                info["afid"] = _join(item['affiliation'], 'afid')
                info["aff_city"] = _join(item['affiliation'],
                                         'affiliation-city')
                info["aff_country"] = _join(item['affiliation'],
                                            'affiliation-country')
            except KeyError:
                pass
            # Parse authors
            try:
                # Deduplicate list of authors
                authors = _deduplicate(item['author'])
                # Extract information
                surnames = _replace_none([d['surname'] for d in authors])
                firstnames = _replace_none([d['given-name'] for d in authors])
                info["auth_names"] = ";".join([
                    ", ".join([t[0], t[1]]) for t in zip(surnames, firstnames)
                ])
                info["auth_ids"] = ";".join([d['authid'] for d in authors])
                affs = []
                for auth in authors:
                    aff = listify(_deduplicate(auth.get('afid', [])))
                    affs.append('-'.join([d['$'] for d in aff]))
                info["auth_afid"] = (';'.join(affs) or None)
            except KeyError:
                pass
            date = item.get('prism:coverDate')
            if isinstance(date, list):
                date = date[0].get('$')
            new = doc(article_number=item.get('article-number'),
                      title=item.get('dc:title'),
                      fund_sponsor=item.get('fund-sponsor'),
                      subtype=item.get('subtype'),
                      issn=item.get('prism:issn'),
                      creator=item.get('dc:creator'),
                      affilname=info.get("affilname"),
                      author_names=info.get("auth_names"),
                      doi=item.get('prism:doi'),
                      coverDate=date,
                      volume=item.get('prism:volume'),
                      coverDisplayDate=item.get('prism:coverDisplayDate'),
                      publicationName=item.get('prism:publicationName'),
                      source_id=item.get('source-id'),
                      author_ids=info.get("auth_ids"),
                      aggregationType=item.get('prism:aggregationType'),
                      issueIdentifier=item.get('prism:issueIdentifier'),
                      pageRange=item.get('prism:pageRange'),
                      author_afids=info.get("auth_afid"),
                      fund_no=item.get('fund-no'),
                      affiliation_country=info.get("aff_country"),
                      citedby_count=item.get('citedby-count'),
                      openaccess=item.get('openaccess'),
                      eIssn=item.get('prism:eIssn'),
                      author_count=item.get('author-count', {}).get('$'),
                      affiliation_city=info.get("aff_city"),
                      afid=info.get("afid"),
                      description=item.get('dc:description'),
                      pii=item.get('pii'),
                      authkeywords=item.get('authkeywords'),
                      eid=item['eid'],
                      fund_acr=item.get('fund-acr'),
                      pubmed_id=item.get('pubmed-id'))
            out.append(new)
        return out or None
예제 #33
0
    def results(self):
        """A list of namedtuples in the form (eid doi pii pubmed_id title
        subtype creator afid affilname affiliation_city affiliation_country
        author_count author_names author_ids author_afids coverDate
        coverDisplayDate publicationName issn source_id eIssn aggregationType
        volume issueIdentifier article_number pageRange description
        authkeywords citedby_count openaccess fund_acr fund_no fund_sponsor).
        Field definitions correspond to
        https://dev.elsevier.com/guides/ScopusSearchViews.htm, except for
        afid, affilname, affiliation_city, affiliation_country, author_count,
        author_names, author_ids and author_afids:  These information are
        joined on ";".  In case an author has multiple affiliations, they are
        joined on "-" (e.g. Author1Aff;Author2Aff1-Author2Aff2).

        Notes
        -----
        The list of authors and the list of affiliations per author are
        deduplicated.
        """
        out = []
        fields = 'eid doi pii pubmed_id title subtype creator afid affilname '\
                 'affiliation_city affiliation_country author_count '\
                 'author_names author_ids author_afids coverDate '\
                 'coverDisplayDate publicationName issn source_id eIssn '\
                 'aggregationType volume issueIdentifier article_number '\
                 'pageRange description authkeywords citedby_count '\
                 'openaccess fund_acr fund_no fund_sponsor'
        doc = namedtuple('Document', fields)
        for item in self._json:
            info = {}
            # Parse affiliations
            try:
                info["affilname"] = _join(item['affiliation'], 'affilname')
                info["afid"] = _join(item['affiliation'], 'afid')
                info["aff_city"] = _join(item['affiliation'], 'affiliation-city')
                info["aff_country"] = _join(item['affiliation'],
                                            'affiliation-country')
            except KeyError:
                pass
            # Parse authors
            try:
                # Deduplicate list of authors
                authors = _deduplicate(item['author'])
                # Extract information
                surnames = _replace_none([d['surname'] for d in authors])
                firstnames = _replace_none([d['given-name'] for d in authors])
                info["auth_names"] = ";".join([", ".join([t[0], t[1]]) for t in
                                               zip(surnames, firstnames)])
                info["auth_ids"] = ";".join([d['authid'] for d in authors])
                affs = []
                for auth in authors:
                    aff = listify(_deduplicate(auth.get('afid', [])))
                    affs.append('-'.join([d['$'] for d in aff]))
                info["auth_afid"] = (';'.join(affs) or None)
            except KeyError:
                pass
            date = item.get('prism:coverDate')
            if isinstance(date, list):
                date = date[0].get('$')
            new = doc(article_number=item.get('article-number'),
                title=item.get('dc:title'), fund_sponsor=item.get('fund-sponsor'),
                subtype=item.get('subtype'), issn=item.get('prism:issn'),
                creator=item.get('dc:creator'), affilname=info.get("affilname"),
                author_names=info.get("auth_names"), doi=item.get('prism:doi'),
                coverDate=date, volume=item.get('prism:volume'),
                coverDisplayDate=item.get('prism:coverDisplayDate'),
                publicationName=item.get('prism:publicationName'),
                source_id=item.get('source-id'), author_ids=info.get("auth_ids"),
                aggregationType=item.get('prism:aggregationType'),
                issueIdentifier=item.get('prism:issueIdentifier'),
                pageRange=item.get('prism:pageRange'),
                author_afids=info.get("auth_afid"), fund_no=item.get('fund-no'),
                affiliation_country=info.get("aff_country"),
                citedby_count=item.get('citedby-count'),
                openaccess=item.get('openaccess'), eIssn=item.get('prism:eIssn'),
                author_count=item.get('author-count', {}).get('$'),
                affiliation_city=info.get("aff_city"), afid=info.get("afid"),
                description=item.get('dc:description'), pii=item.get('pii'),
                authkeywords=item.get('authkeywords'), eid=item['eid'],
                fund_acr=item.get('fund-acr'), pubmed_id=item.get('pubmed-id'))
            out.append(new)
        return out or None
예제 #34
0
    def references(self):
        """List of namedtuples representing references listed in the abstract,
        in the form (position, id, doi, title, authors, authors_auid,
        authors_affiliationid, sourcetitle, publicationyear, volume, issue, first,
        last, citedbycount, text, fulltext).
        `position` is the number at which the reference appears in the
        document, `id` is the Scopus ID of the referenced abstract (EID
        without the "2-s2.0-"), `authors` is a string of the names of the
        authors in the format "Surname1, Initials1; Surname2, Initials2",
        `authors_auid` is a string of the author IDs joined on "; ",
        `authors_affiliationid` is a string of the authors' affiliation IDs
        joined on "; ", `sourcetitle` is the name of the source (e.g. the
        journal), `publicationyear` is the year of the publication as a string,
        `volume` and `issue`, are strings referring to the volume and issue,
        `first` and `last` refer to the page range, `citedbycount` is a string
        for the total number of citations of the cited item, `text` is
        Scopus-provided information on the publication, `fulltext` is the text
        the authors used for the reference.

        Note: Requires either the FULL view or REF view of the article.  Might
        be empty even if refcount is positive.  Specific fields can be empty.
        Author lists (authors, authors_auid, authors_affiliationid) may contain
        duplicates but have been filtered of None's.
        """
        out = []
        fields = 'position id doi title authors authors_auid '\
                 'authors_affiliationid sourcetitle publicationyear volume '\
                 'issue first last citedbycount text fulltext'
        ref = namedtuple('Reference', fields)
        path = ['item', 'bibrecord', 'tail', 'bibliography', 'reference']
        items = listify(
            chained_get(self._json, path,
                        self._json.get('references', {}).get('reference', [])))
        for item in items:
            info = item.get('ref-info', item)
            volisspag = info.get('volisspag', {}) or {}
            if isinstance(volisspag, list):
                volisspag = volisspag[0]
            # Parse author information
            try:  # FULL view parsing
                auth = listify(item['ref-info']['ref-authors']['author'])
                authors = [
                    ', '.join([d['ce:surname'], d['ce:initials']])
                    for d in auth
                ]
                auids = None
                affids = None
            except KeyError:  # REF view parsing
                auth = (info.get('author-list') or {}).get('author', [])
                authors = [
                    ', '.join(
                        filter(None,
                               [d.get('ce:surname'),
                                d.get('ce:given-name')])) for d in auth
                ]
                auids = "; ".join(filter(None, [d.get('@auid') for d in auth]))
                affs = filter(None, [d.get('affiliation') for d in auth])
                affids = "; ".join([aff.get('@id') for aff in affs])
            # Parse IDs
            try:
                ids = listify(info['refd-itemidlist']['itemid'])
            except KeyError:
                ids = []
            try:
                doi = [d['$'] for d in ids if d['@idtype'] == 'DOI'][0]
            except IndexError:
                doi = info.get('ce:doi')
            try:
                scopus_id = [d['$'] for d in ids if d['@idtype'] == 'SGR'][0]
            except IndexError:
                scopus_id = info.get('scopus-id')
            # Combine information
            new = ref(position=item.get('@id'),
                      id=scopus_id,
                      doi=doi,
                      authors="; ".join(authors),
                      authors_auid=auids or None,
                      authors_affiliationid=affids or None,
                      title=info.get('ref-title',
                                     {}).get('ref-titletext',
                                             info.get('title')),
                      sourcetitle=info.get('ref-sourcetitle',
                                           info.get('sourcetitle')),
                      publicationyear=info.get('ref-publicationyear',
                                               {}).get('@first'),
                      volume=volisspag.get('voliss', {}).get('@volume'),
                      issue=volisspag.get('voliss', {}).get('@issue'),
                      first=volisspag.get('pagerange', {}).get('@first'),
                      last=volisspag.get('pagerange', {}).get('@last'),
                      citedbycount=info.get('citedby-count'),
                      text=info.get('ref-text'),
                      fulltext=item.get('ref-fulltext'))
            out.append(new)
        return out or None