Python Retrieval Examples, pybliometrics.scopus.superclasses.Retrieval Python Examples

Example #1

0

Show file

    def __init__(self, aff_id, refresh=False):
        """Interaction with the Content Affiliation Retrieval API.

        Parameters
        ----------
        aff_id : str or int
            The Scopus Affiliation ID.  Optionally expressed
            as an Elsevier EID (i.e., in the form 10-s2.0-nnnnnnnn).

        refresh : bool or int (optional, default=False)
            Whether to refresh the cached file if it exists or not.  If int
            is passed, cached file will be refreshed if the number of days
            since last modification exceeds that value.

        Examples
        --------
        See https://pybliometrics.readthedocs.io/en/stable/examples/ContentAffiliationRetrieval.html.

        Notes
        -----
        The directory for cached results is `{path}/STANDARD/{aff_id}`,
        where `path` is specified in `~/.scopus/config.ini`.
        """
        # Load json
        aff_id = str(int(str(aff_id).split('-')[-1]))
        Retrieval.__init__(self,
                           identifier=aff_id,
                           view="STANDARD",
                           refresh=refresh,
                           api='ContentAffiliationRetrieval')
        self._json = self._json['affiliation-retrieval-response']

Example #2

0

Show file

    def __init__(self, author_id, refresh=False, view="ENHANCED"):
        """Interaction with the Author Retrieval API.

        Parameters
        ----------
        author_id : str or int
            The ID of the author to search for.  Optionally expressed
            as an Elsevier EID (i.e., in the form 9-s2.0-nnnnnnnn).

        refresh : bool or int (optional, default=False)
            Whether to refresh the cached file if it exists or not.  If int
            is passed, cached file will be refreshed if the number of days
            since last modification exceeds that value.

        view : str (optional, default=META_ABS)
            The view of the file that should be downloaded.  Allowed values:
            METRICS, LIGHT, STANDARD, ENHANCED, where STANDARD includes all
            information of LIGHT view and ENHANCED includes all information of
            any view.  For details see
            https://dev.elsevier.com/sc_author_retrieval_views.html.
            Note: Neither the BASIC nor the DOCUMENTS view are not active,
            although documented.

        Examples
        --------
        See https://pybliometrics.readthedocs.io/en/stable/examples/AuthorRetrieval.html.

        Notes
        -----
        The directory for cached results is `{path}/ENHANCED/{author_id}`,
        where `path` is specified in `~/.scopus/config.ini` and `author_id`
        is stripped of an eventually leading `'9-s2.0-'`.
        """
        # Checks
        allowed_views = ('METRICS', 'LIGHT', 'STANDARD', 'ENHANCED')
        check_parameter_value(view, allowed_views, "view")

        # Load json
        self._id = str(int(str(author_id).split('-')[-1]))
        Retrieval.__init__(self, identifier=self._id, api='AuthorRetrieval',
                           refresh=refresh, view=view)
        self._json = self._json['author-retrieval-response']
        try:
            self._json = self._json[0]
        except KeyError:  # Incomplete forward
            alias_json = listify(self._json['alias']['prism:url'])
            self._alias = [d['$'].split(':')[-1] for d in alias_json]
            alias_str = ', '.join(self._alias)
            text = f'Author profile with ID {author_id} has been merged and '\
                   f'the main profile is now one of {alias_str}.  Please update '\
                   'your records manually.  Functionality of this object is '\
                   'reduced.'
            warn(text, UserWarning)
        else:
            self._alias = None
        self._profile = self._json.get("author-profile", {})

Example #3

0

Show file

    def __init__(self, issn, refresh=False, view="ENHANCED", years=None):
        """Interaction with the Serial Title API.

        Parameters
        ----------
        issn : str or int
            The ISSN or the E-ISSN of the source.

        refresh : bool or int (optional, default=False)
            Whether to refresh the cached file if it exists or not.  If int
            is passed, cached file will be refreshed if the number of days
            since last modification exceeds that value.

        view : str (optional, default="ENHANCED")
            The view of the file that should be downloaded.  Allowed values:
            BASIC, STANDARD, ENHANCED.  For details see
            https://dev.elsevier.com/guides/SerialTitleViews.htm.

        years : str (optional, default=None)
            A string specifying a year or range of years (combining two
            years with a hyphen) for which yearly metric data (SJR, SNIP,
            yearly-data) should be looked up for.  If None, only the
            most recent metric data values are provided.
            Note: If not None, refresh will always be True.

        Examples
        --------
        See https://pybliometrics.readthedocs.io/en/stable/examples/SerialTitle.html.

        Notes
        -----
        The directory for cached results is `{path}/{view}/{source_id}`,
        where `path` is specified in `~/.scopus/config.ini`.
        """
        # Checks
        allowed_views = ('BASIC', 'STANDARD', 'ENHANCED')
        if view not in allowed_views:
            raise ValueError('view parameter must be one of ' +
                             ', '.join(allowed_views))
        # Load json
        self._id = str(issn)
        self._years = years
        # Force refresh when years is specified
        if years:
            refresh = True
        Retrieval.__init__(self,
                           identifier=self._id,
                           view=view,
                           date=years,
                           api='SerialTitle',
                           refresh=refresh)
        self._json = self._json['serial-metadata-response']
        self._entry = self._json['entry'][0]

Example #4

0

Show file

File: abstract_citations.py Project: sphinxnh/pybliometrics

    def __init__(self, eid, start, end=datetime.now().year, refresh=False):
        """Interaction witht the Citation Overview API.

        Parameters
        ----------
        eid : str
            The EID of the abstract.

        start : str or int
            The first year for which the citation count should be loaded

        end : str or int (optional, default=datetime.now().year)
            The last year for which the citation count should be loaded.
            Default is the current year.

        refresh : bool or int (optional, default=False)
            Whether to refresh the cached file if it exists or not.  If int
            is passed, cached file will be refreshed if the number of days
            since last modification exceeds that value.

        Notes
        -----
        The directory for cached results is `{path}/STANDARD/{eid}`,
        where `path` is specified in `~/.scopus/config.ini`.

        Your API Key needs to be approved by Elsevier to access this API.
        """
        # Variables
        self._start = int(start)
        self._end = int(end)
        view = "STANDARD"  # In case Scopus adds different views in future

        # Get file content
        date = f'{start}-{end}'
        Retrieval.__init__(self,
                           eid,
                           'CitationOverview',
                           refresh,
                           view=view,
                           date=date)
        self._data = self._json['abstract-citations-response']

        # citeInfoMatrix
        m = self._data['citeInfoMatrix']['citeInfoMatrixXML'][
            'citationMatrix']['citeInfo'][0]
        self._citeInfoMatrix = _parse_dict(m)
        # identifier-legend
        l = self._data['identifier-legend']['identifier'][0]
        self._identifierlegend = _parse_dict(l)
        # citeColumnTotalXML
        self._citeColumnTotalXML = self._data['citeColumnTotalXML']  # not used

Example #5

0

Show file

File: author_retrieval.py Project: lmcnichols/pybliometrics

    def __init__(self, author_id, refresh=False):
        """Interaction with the Author Retrieval API.

        Parameters
        ----------
        author_id : str or int
            The ID of the author to search for.  Optionally expressed
            as an Elsevier EID (i.e., in the form 9-s2.0-nnnnnnnn).

        refresh : bool or int (optional, default=False)
            Whether to refresh the cached file if it exists or not.  If int
            is passed, cached file will be refreshed if the number of days
            since last modification exceeds that value.

        Examples
        --------
        See https://pybliometrics.readthedocs.io/en/stable/examples/AuthorRetrieval.html

        Notes
        -----
        The directory for cached results is `{path}/ENHANCED/{author_id}`,
        where `path` is specified in `~/.scopus/config.ini` and `author_id`
        is stripped of an eventually leading `'9-s2.0-'`.
        """
        # Load json
        view = "ENHANCED"  # In case Scopus adds different views in future
        self._id = str(int(str(author_id).split('-')[-1]))
        Retrieval.__init__(self,
                           identifier=self._id,
                           api='AuthorRetrieval',
                           refresh=refresh,
                           view=view)
        self._json = self._json['author-retrieval-response']
        # Checks
        try:
            self._json = self._json[0]
        except KeyError:  # Incomplete forward
            alias_json = listify(self._json['alias']['prism:url'])
            self._alias = [d['$'].split(':')[-1] for d in alias_json]
            alias_str = ', '.join(self._alias)
            text = f'Author profile with ID {author_id} has been merged and '\
                   f'the main profile is now one of {alias_str}.  Please update '\
                   'your records manually.  Functionality of this object is '\
                   'reduced.'
            warn(text, UserWarning)
        else:
            self._alias = None

Example #6

0

Show file

    def __init__(self, aff_id, refresh=False, view="STANDARD"):
        """Interaction with the Affiliation Retrieval API.

        Parameters
        ----------
        aff_id : str or int
            The Scopus Affiliation ID.  Optionally expressed
            as an Elsevier EID (i.e., in the form 10-s2.0-nnnnnnnn).

        refresh : bool or int (optional, default=False)
            Whether to refresh the cached file if it exists or not.  If int
            is passed, cached file will be refreshed if the number of days
            since last modification exceeds that value.

        view : str (optional, default=STANDARD)
            The view of the file that should be downloaded.  Allowed values:
            LIGHT, STANDARD, where STANDARD includes all information of the
            LIGHT view.  For details see
            https://dev.elsevier.com/sc_affil_retrieval_views.html.
            Note: Neither the BASIC view nor DOCUMENTS or AUTHORS views are
            active, although documented.

        Examples
        --------
        See https://pybliometrics.readthedocs.io/en/stable/examples/AffiliationRetrieval.html.

        Notes
        -----
        The directory for cached results is `{path}/{view}/{aff_id}`,
        where `path` is specified in `~/.scopus/config.ini`.
        """
        # Checks
        check_parameter_value(view, ('LIGHT', 'STANDARD'), "view")

        # Load json
        aff_id = str(int(str(aff_id).split('-')[-1]))
        Retrieval.__init__(self,
                           identifier=aff_id,
                           view=view,
                           refresh=refresh,
                           api='AffiliationRetrieval')
        self._json = self._json['affiliation-retrieval-response']
        self._profile = self._json.get("institution-profile", {})

Example #7

0

Show file

File: affiliation_retrieval.py Project: raffaem/pybliometrics

    def __init__(self,
                 aff_id: Union[int, str],
                 refresh: Union[bool, int] = False,
                 view: str = "STANDARD",
                 **kwds: str) -> None:
        """Interaction with the Affiliation Retrieval API.

        :param aff_id: Scopus ID or EID of the affiliation profile.
        :param refresh: Whether to refresh the cached file if it exists or not.
                        If int is passed, cached file will be refreshed if the
                        number of days since last modification exceeds that value.
        :param view: The view of the file that should be downloaded.  Allowed
                     values: LIGHT, STANDARD, where STANDARD includes all
                     information of the LIGHT view.  For details see
                     https://dev.elsevier.com/sc_affil_retrieval_views.html.
                     Note: Neither the BASIC view nor DOCUMENTS or AUTHORS
                     views are active, although documented.
        :param kwds: Keywords passed on as query parameters.  Must contain
                     fields and values mentioned in the API specification at
                     https://dev.elsevier.com/documentation/AffiliationRetrievalAPI.wadl.

        Raises
        ------
        ValueError
            If any of the parameters `refresh` or `view` is not
            one of the allowed values.

        Notes
        -----
        The directory for cached results is `{path}/{view}/{aff_id}`,
        where `path` is specified in your configuration file.
        """
        # Checks
        check_parameter_value(view, ('LIGHT', 'STANDARD'), "view")

        # Load json
        self._view = view
        self._refresh = refresh
        aff_id = str(int(str(aff_id).split('-')[-1]))
        Retrieval.__init__(self, aff_id, api='AffiliationRetrieval', **kwds)
        self._json = self._json['affiliation-retrieval-response']
        self._profile = self._json.get("institution-profile", {})

Example #8

0

Show file

File: serial_title.py Project: milensys/pybliometrics

    def __init__(self, issn, refresh=False, view="ENHANCED"):
        """Interaction with the Serial Title API.

        Parameters
        ----------
        issn : str or int
            The ISSN or the E-ISSN of the source.

        refresh : bool or int (optional, default=False)
            Whether to refresh the cached file if it exists or not.  If int
            is passed, cached file will be refreshed if the number of days
            since last modification exceeds that value.

        view : str (optional, default="ENHANCED")
            The view of the file that should be downloaded.  Allowed values:
            BASIC, STANDARD, ENHANCED.  For details see
            https://dev.elsevier.com/guides/SerialTitleViews.htm.

        Examples
        --------
        See https://pybliometrics.readthedocs.io/en/stable/examples/SerialTitle.html.

        Notes
        -----
        The directory for cached results is `{path}/{view}/{source_id}`,
        where `path` is specified in `~/.scopus/config.ini`.
        """
        # Checks
        allowed_views = ('BASIC', 'STANDARD', 'ENHANCED')
        if view not in allowed_views:
            raise ValueError('view parameter must be one of ' +
                             ', '.join(allowed_views))
        # Load json
        self._id = str(issn)
        Retrieval.__init__(self, identifier=self._id, view=view,
                           api='SerialTitle', refresh=refresh)
        self._json = self._json['serial-metadata-response']
        self._entry = self._json['entry'][0]

Example #9

0

Show file

File: abstract_retrieval.py Project: thatguy1104/NLP-Data-Mining-Engine

    def __init__(self, identifier=None, refresh=False, view='META_ABS',
                 id_type=None, **kwds):
        """Interaction with the Abstract Retrieval API.

        Parameters
        ----------
        identifier : str or int
            The identifier of a document.  Can be the Scopus EID, the Scopus
            ID, the PII, the Pubmed-ID or the DOI.

        refresh : bool or int (optional, default=False)
            Whether to refresh the cached file if it exists or not.  If int
            is passed, cached file will be refreshed if the number of days
            since last modification exceeds that value.

        id_type: str (optional, default=None)
            The type of used ID. Allowed values: None, 'eid', 'pii',
            'scopus_id', 'pubmed_id', 'doi'.  If the value is None, the
            function tries to infer the ID type itself.

        view : str (optional, default=META_ABS)
            The view of the file that should be downloaded.  Allowed values:
            META, META_ABS, REF, FULL, where FULL includes all information
            of META_ABS view and META_ABS includes all information of the
            META view.  For details see
            https://dev.elsevier.com/guides/AbstractRetrievalViews.htm.

        kwds : key-value parings, optional
            Keywords passed on as query parameters.  Must contain fields
            and values listed mentioned in the API specification
            (https://dev.elsevier.com/documentation/AbstractRetrievalAPI.wadl),
            such as "startref" or "refcount".

        Raises
        ------
        ValueError
            If the id_type parameter or the view parameter contains
            invalid entries.

        Examples
        --------
        See https://pybliometrics.readthedocs.io/en/stable/examples/AbstractRetrieval.html.

        Notes
        -----
        The directory for cached results is `{path}/{view}/{identifier}`,
        where `path` is specified in `~/.scopus/config.ini`.  In case
        `identifier` is a DOI,, an underscore replaces the forward slash.
        """
        # Checks
        identifier = str(identifier)
        allowed_views = ('META', 'META_ABS', 'REF', 'FULL')
        if view not in allowed_views:
            raise ValueError('view parameter must be one of ' +
                             ', '.join(allowed_views))
        if id_type is None:
            id_type = detect_id_type(identifier)
        else:
            allowed_id_types = ('eid', 'pii', 'scopus_id', 'pubmed_id', 'doi')
            if id_type not in allowed_id_types:
                raise ValueError('id_type parameter must be one of ' +
                                 ', '.join(allowed_id_types))

        # Load json
        Retrieval.__init__(self, identifier=identifier, id_type=id_type,
                           api='AbstractRetrieval', refresh=refresh,
                           view=view, **kwds)
        self._json = self._json['abstracts-retrieval-response']
        self._head = chained_get(self._json, ["item", "bibrecord", "head"], {})
        conf_path = ['source', 'additional-srcinfo', 'conferenceinfo', 'confevent']
        self._confevent = chained_get(self._head, conf_path, {})
        if self._view == "REF":
            ref_path = ["references"]
        else:
            ref_path = ['item', 'bibrecord', 'tail', 'bibliography']
        self._ref = chained_get(self._json, ref_path, {})

Example #10

0

Show file

File: plumx_metrics.py Project: lmcnichols/pybliometrics

    def __init__(self, identifier, id_type, refresh=False):
        """Interaction with the PlumX Metrics API.

        Parameters
        ----------
        identifier : str
            The identifier of a document.

        id_type: str
            The type of used ID. Allowed values are:
                - 'airitiDocId'
                - 'arxivId'
                - 'cabiAbstractId'
                - 'citeulikeId'
                - 'digitalMeasuresArtifactId'
                - 'doi'
                - 'elsevierId'
                - 'elsevierPii'
                - 'facebookCountUrlId'
                - 'figshareArticleId'
                - 'githubRepoId'
                - 'isbn'
                - 'lccn'
                - 'medwaveId'
                - 'nctId'
                - 'oclc'
                - 'pittEprintDscholarId'
                - 'pmcid'
                - 'pmid'
                - 'redditId'
                - 'repecHandle'
                - 'repoUrl'
                - 'scieloId'
                - 'sdEid'
                - 'slideshareUrlId'
                - 'smithsonianPddrId'
                - 'soundcloudTrackId'
                - 'ssrnId'
                - 'urlId'
                - 'usPatentApplicationId'
                - 'usPatentPublicationId'
                - 'vimeoVideoId'
                - 'youtubeVideoId'

        refresh : bool or int (optional, default=False)
            Whether to refresh the cached file if it exists or not.  If int
            is passed, cached file will be refreshed if the number of days
            since last modification exceeds that value.

        Examples
        --------
        See https://pybliometrics.readthedocs.io/en/stable/examples/PlumXMetrics.html.

        Notes
        -----
        The directory for cached results is `{path}/ENHANCED/{identifier}`,
        where `path` is specified in `~/.scopus/config.ini`.
        """
        allowed_ids = ('airitiDocId', 'arxivId', 'cabiAbstractId',
                       'citeulikeId', 'digitalMeasuresArtifactId', 'doi',
                       'elsevierId', 'elsevierPii', 'facebookCountUrlId',
                       'figshareArticleId', 'githubRepoId', 'isbn', 'lccn',
                       'medwaveId', 'nctId', 'oclc', 'pittEprintDscholarId',
                       'pmcid', 'pmid', 'redditId', 'repecHandle', 'repoUrl',
                       'scieloId', 'sdEid', 'slideshareUrlId',
                       'smithsonianPddrId', 'soundcloudTrackId', 'ssrnId',
                       'urlId', 'usPatentApplicationId',
                       'usPatentPublicationId', 'vimeoVideoId',
                       'youtubeVideoId')
        if id_type not in allowed_ids:
            raise ValueError('Id type must be one of: ' +
                             ', '.join(allowed_ids))
        self.id_type = id_type
        self.identifier = identifier
        Retrieval.__init__(self,
                           identifier=identifier,
                           id_type=id_type,
                           api='PlumXMetrics',
                           refresh=refresh,
                           view='ENHANCED')

Example #11

0

Show file

File: plumx_metrics.py Project: raffaem/pybliometrics

    def __init__(self,
                 identifier: str,
                 id_type: str,
                 refresh: Union[bool, int] = False,
                 **kwds: str
                 ) -> None:
        """Interaction with the PlumX Metrics API.

        :param identifier: The identifier of a document.
        :param id_type: The type of used ID. Allowed values are:
                        - 'airitiDocId'
                        - 'arxivId'
                        - 'cabiAbstractId'
                        - 'citeulikeId'
                        - 'digitalMeasuresArtifactId'
                        - 'doi'
                        - 'elsevierId'
                        - 'elsevierPii'
                        - 'facebookCountUrlId'
                        - 'figshareArticleId'
                        - 'githubRepoId'
                        - 'isbn'
                        - 'lccn'
                        - 'medwaveId'
                        - 'nctId'
                        - 'oclc'
                        - 'pittEprintDscholarId'
                        - 'pmcid'
                        - 'pmid'
                        - 'redditId'
                        - 'repecHandle'
                        - 'repoUrl'
                        - 'scieloId'
                        - 'sdEid'
                        - 'slideshareUrlId'
                        - 'smithsonianPddrId'
                        - 'soundcloudTrackId'
                        - 'ssrnId'
                        - 'urlId'
                        - 'usPatentApplicationId'
                        - 'usPatentPublicationId'
                        - 'vimeoVideoId'
                        - 'youtubeVideoId'
        :param refresh: Whether to refresh the cached file if it exists or not.
                        If int is passed, cached file will be refreshed if the
                        number of days since last modification exceeds that value.
        :param kwds: Keywords passed on as query parameters.  Must contain
                     fields and values mentioned in the API specification at
                     https://dev.elsevier.com/documentation/PlumXMetricsAPI.wadl.

        Raises
        ------
        ValueError
            If the parameter `refresh` is not one of the allowed values.

        Notes
        -----
        The directory for cached results is `{path}/ENHANCED/{identifier}`,
        where `path` is specified in your configuration file.
        """
        # Checks
        allowed = ('airitiDocId', 'arxivId', 'cabiAbstractId',
                   'citeulikeId', 'digitalMeasuresArtifactId', 'doi',
                   'elsevierId', 'elsevierPii', 'facebookCountUrlId',
                   'figshareArticleId', 'githubRepoId', 'isbn',
                   'lccn', 'medwaveId', 'nctId', 'oclc',
                   'pittEprintDscholarId', 'pmcid', 'pmid', 'redditId',
                   'repecHandle', 'repoUrl', 'scieloId', 'sdEid',
                   'slideshareUrlId', 'smithsonianPddrId', 'soundcloudTrackId',
                   'ssrnId', 'urlId', 'usPatentApplicationId',
                   'usPatentPublicationId', 'vimeoVideoId', 'youtubeVideoId')
        check_parameter_value(id_type, allowed, "id_type")
        self._id_type = id_type
        self._identifier = identifier

        # Load json
        self._refresh = refresh
        self._view = 'ENHANCED'
        Retrieval.__init__(self, identifier=identifier, id_type=id_type,
                           api='PlumXMetrics', **kwds)
        cats = self._json.get('count_categories', [])
        self._count_categories = {d["name"]: d['count_types'] for d in cats}

Example #12

0

Show file

File: abstract_citation.py Project: raffaem/pybliometrics

    def __init__(self,
                 identifier: List[Union[int, str]],
                 start: Union[int, str],
                 end: Union[int, str] = datetime.now().year,
                 id_type: str = "scopus_id",
                 eid: str = None,
                 refresh: Union[bool, int] = False,
                 citation: Optional[str] = None,
                 **kwds: str) -> None:
        """Interaction witht the Citation Overview API.

        :param identifier: Up to 25 identifiers for which  to look up
                           citations.  Must be Scopus IDs, DOIs, PIIs or
                           Pubmed IDs.
        :param start: The first year for which the citation count should
                      be loaded.
        :param end: The last year for which the citation count should be
                    loaded. Defaults to the current year.
        :param id_type: The type of the IDs provided in `identifier`.  Must be
                        one of "scopus_id", "doi", "pii", "pubmed_id".
        :param eid: (deprecated) The Scopus ID of the abstract - will be
                    removed in a future release: Instead use param `scopus_id`
                    after stripping the part until the second hyphen.  If you
                    use this parameter, it will be converted to `scopus_id`
                    instead.
        :param refresh: Whether to refresh the cached file if it exists or not.
                        If int is passed, cached file will be refreshed if the
                        number of days since last modification exceeds that value.
        :param citation: Allows for the exclusion of self-citations or those
                         by books.  If `None`, will count all citations.
                         Allowed values: None, exclude-self, exclude-books
        :param kwds: Keywords passed on as query parameters.  Must contain
                     fields and values mentioned in the API specification at
                     https://dev.elsevier.com/documentation/AbstractCitationAPI.wadl.

        Raises
        -----
        ValueError
            If parameter `identifier` contains fewer than 1 or more than
            25 elements.

        ValueError
            If any of the parameters `citation`, `id_type` or `refresh` is not
            one of the allowed values.

        Notes
        -----
        The directory for cached results is `{path}/STANDARD/{id}-{citation}`,
        where `path` is specified in your configuration file, and `id` the
        md5-hashed version of a string joining `identifier` on underscore.

        Your API Key needs to be augmented by Elsevier's Scopus
        Integration Team to access this API.
        """
        # Checks
        allowed = ('scopus_id', 'doi', 'pii', 'pubmed_id')
        check_parameter_value(id_type, allowed, "id_type")
        if citation:
            allowed = ('exclude-self', 'exclude-books')
            check_parameter_value(citation, allowed, "citation")
        if eid or not isinstance(identifier, list):
            msg = "Parameter `eid` is deprecated and will be removed in a "\
                  "future release.  Instead, provide the corresponding "\
                  "Scopus ID via parameter `identifier` as a list, and set "\
                  "`id_type='scopus_id'`."
            warn(msg, FutureWarning)
        if len(identifier) < 0 or len(identifier) > 25:
            msg = "Provide at least 1 and at most than 25 identifiers"
            raise ValueError(msg)

        # Variables
        identifier = [str(i) for i in identifier]
        self._start = int(start)
        self._end = int(end)
        self._citation = citation
        self._refresh = refresh
        self._view = "STANDARD"

        # Get file content
        date = f'{start}-{end}'
        kwds.update({id_type: identifier})
        stem = md5("_".join(identifier).encode('utf8')).hexdigest()
        Retrieval.__init__(self,
                           stem,
                           api='CitationOverview',
                           date=date,
                           citation=citation,
                           **kwds)
        self._data = self._json['abstract-citations-response']

        # citeInfoMatrix
        matrix = self._data['citeInfoMatrix']['citeInfoMatrixXML'][
            'citationMatrix']['citeInfo']
        self._citeInfoMatrix = [_parse_dict(e) for e in matrix]
        # identifier-legend
        identifier = self._data['identifier-legend']['identifier']
        self._identifierlegend = [_parse_dict(e) for e in identifier]
        # citeCountHeader
        self._citeCountHeader = self._data['citeColumnTotalXML'][
            "citeCountHeader"]

Example #13

0

Show file

    def __init__(self, eid, start, end=datetime.now().year, citation=None,
                 refresh=False):
        """Interaction witht the Citation Overview API.

        Parameters
        ----------
        eid : str
            The EID of the abstract.

        start : str or int
            The first year for which the citation count should be loaded

        end : str or int (optional, default=datetime.now().year)
            The last year for which the citation count should be loaded.
            Default is the current year.

        citation : str (optional, default=None)
            Allows for the exclusion of self-citations or those by books.
            If None, will count all citations.
            Allowed values: None, exclude-self, exclude-books

        refresh : bool or int (optional, default=False)
            Whether to refresh the cached file if it exists or not.  If int
            is passed, cached file will be refreshed if the number of days
            since last modification exceeds that value.

        Examples
        --------
        See https://pybliometrics.readthedocs.io/en/stable/examples/CitationOverview.html.

        Notes
        -----
        The directory for cached results is `{path}/STANDARD/{eid}`,
        where `path` is specified in `~/.scopus/config.ini`.

        Your API Key needs to be approved by Elsevier to access this API.
        """
        # Checks
        if citation:
            allowed = ('exclude-self', 'exclude-books')
            check_parameter_value(citation, allowed, "citation")

        # Variables
        self._start = int(start)
        self._end = int(end)
        self._citation = citation
        view = "STANDARD"  # In case Scopus adds different views in future

        # Get file content
        date = f'{start}-{end}'
        Retrieval.__init__(self, eid, 'CitationOverview', refresh, view=view,
                           date=date, citation=citation)
        self._data = self._json['abstract-citations-response']

        # citeInfoMatrix
        m = self._data['citeInfoMatrix']['citeInfoMatrixXML']['citationMatrix']['citeInfo'][0]
        self._citeInfoMatrix = _parse_dict(m)
        # identifier-legend
        l = self._data['identifier-legend']['identifier'][0]
        self._identifierlegend = _parse_dict(l)
        # citeColumnTotalXML
        self._citeColumnTotalXML = self._data['citeColumnTotalXML']  # not used

Example #14

0

Show file

File: author_retrieval.py Project: raffaem/pybliometrics

    def __init__(self,
                 author_id: Union[int, str],
                 refresh: Union[bool, int] = False,
                 view: str = "ENHANCED",
                 **kwds: str) -> None:
        """Interaction with the Author Retrieval API.

        :param author_id: The ID or the EID of the author.
        :param refresh: Whether to refresh the cached file if it exists or not.
                        If int is passed, cached file will be refreshed if the
                        number of days since last modification exceeds that value.
        :param view: The view of the file that should be downloaded.  Allowed
                     values: METRICS, LIGHT, STANDARD, ENHANCED, where STANDARD
                     includes all information of LIGHT view and ENHANCED
                     includes all information of any view.  For details see
                     https://dev.elsevier.com/sc_author_retrieval_views.html.
                     Note: Neither the BASIC nor the DOCUMENTS view are active,
                     although documented.
        :param kwds: Keywords passed on as query parameters.  Must contain
                     fields and values mentioned in the API specification at
                     https://dev.elsevier.com/documentation/AuthorRetrievalAPI.wadl.

        Raises
        ------
        ValueError
            If any of the parameters `refresh` or `view` is not
            one of the allowed values.

        Notes
        -----
        The directory for cached results is `{path}/ENHANCED/{author_id}`,
        where `path` is specified in your configuration file, and `author_id`
        is stripped of an eventually leading `'9-s2.0-'`.
        """
        # Checks
        allowed_views = ('METRICS', 'LIGHT', 'STANDARD', 'ENHANCED')
        check_parameter_value(view, allowed_views, "view")

        # Load json
        self._id = str(author_id).split('-')[-1]
        self._view = view
        self._refresh = refresh
        Retrieval.__init__(self,
                           identifier=self._id,
                           api='AuthorRetrieval',
                           **kwds)

        # Parse json
        self._json = self._json['author-retrieval-response']
        try:
            self._json = self._json[0]
        except KeyError:  # Incomplete forward
            alias_json = listify(self._json['alias']['prism:url'])
            self._alias = [d['$'].split(':')[-1] for d in alias_json]
            alias_str = ', '.join(self._alias)
            text = f'Author profile with ID {author_id} has been merged and '\
                   f'the main profile is now one of {alias_str}.  Please update '\
                   'your records manually.  Functionality of this object is '\
                   'reduced.'
            warn(text, UserWarning)
        else:
            self._alias = None
        self._profile = self._json.get("author-profile", {})

Example #15

0

Show file

File: abstract_retrieval.py Project: raffaem/pybliometrics

    def __init__(self,
                 identifier: Union[int, str] = None,
                 refresh: Union[bool, int] = False,
                 view: str = 'META_ABS',
                 id_type: str = None,
                 **kwds: str) -> None:
        """Interaction with the Abstract Retrieval API.

        :param identifier: The identifier of a document.  Can be the Scopus EID
                           , the Scopus ID, the PII, the Pubmed-ID or the DOI.
        :param refresh: Whether to refresh the cached file if it exists or not.
                        If int is passed, cached file will be refreshed if the
                        number of days since last modification exceeds that value.
        :param id_type: The type of used ID. Allowed values: None, 'eid', 'pii',
                        'scopus_id', 'pubmed_id', 'doi'.  If the value is None,
                        the function tries to infer the ID type itself.
        :param view: The view of the file that should be downloaded.  Allowed
                     values: META, META_ABS, REF, FULL, where FULL includes all
                     information of META_ABS view and META_ABS includes all
                     information of the META view.  For details see
                     https://dev.elsevier.com/sc_abstract_retrieval_views.html.
        :param kwds: Keywords passed on as query parameters.  Must contain
                     fields and values listed in the API specification at
                     https://dev.elsevier.com/documentation/AbstractRetrievalAPI.wadl.

        Raises
        ------
        ValueError
            If any of the parameters `id_type`, `refresh` or `view` is not
            one of the allowed values.

        Notes
        -----
        The directory for cached results is `{path}/{view}/{identifier}`,
        where `path` is specified in your configuration file.  In case
        `identifier` is a DOI, an underscore replaces the forward slash.
        """
        # Checks
        identifier = str(identifier)
        check_parameter_value(view, ('META', 'META_ABS', 'REF', 'FULL'),
                              "view")
        if id_type is None:
            id_type = detect_id_type(identifier)
        else:
            allowed_id_types = ('eid', 'pii', 'scopus_id', 'pubmed_id', 'doi')
            check_parameter_value(id_type, allowed_id_types, "id_type")

        # Load json
        self._view = view
        self._refresh = refresh
        Retrieval.__init__(self,
                           identifier=identifier,
                           id_type=id_type,
                           api='AbstractRetrieval',
                           **kwds)
        self._json = self._json['abstracts-retrieval-response']
        self._head = chained_get(self._json, ["item", "bibrecord", "head"], {})
        conf_path = [
            'source', 'additional-srcinfo', 'conferenceinfo', 'confevent'
        ]
        self._confevent = chained_get(self._head, conf_path, {})
        if self._view == "REF":
            ref_path = ["references"]
        else:
            ref_path = ['item', 'bibrecord', 'tail', 'bibliography']
        self._ref = chained_get(self._json, ref_path, {})