예제 #1
0
    def __init__(self, identifier, id_type, refresh=False):
        """Interaction with the PlumX Metrics API.

        Parameters
        ----------
        identifier : str
            The identifier of a document.

        id_type: str
            The type of used ID. Allowed values are:
                - 'airitiDocId'
                - 'arxivId'
                - 'cabiAbstractId'
                - 'citeulikeId'
                - 'digitalMeasuresArtifactId'
                - 'doi'
                - 'elsevierId'
                - 'elsevierPii'
                - 'facebookCountUrlId'
                - 'figshareArticleId'
                - 'githubRepoId'
                - 'isbn'
                - 'lccn'
                - 'medwaveId'
                - 'nctId'
                - 'oclc'
                - 'pittEprintDscholarId'
                - 'pmcid'
                - 'pmid'
                - 'redditId'
                - 'repecHandle'
                - 'repoUrl'
                - 'scieloId'
                - 'sdEid'
                - 'slideshareUrlId'
                - 'smithsonianPddrId'
                - 'soundcloudTrackId'
                - 'ssrnId'
                - 'urlId'
                - 'usPatentApplicationId'
                - 'usPatentPublicationId'
                - 'vimeoVideoId'
                - 'youtubeVideoId'

        refresh : bool or int (optional, default=False)
            Whether to refresh the cached file if it exists or not.  If int
            is passed, cached file will be refreshed if the number of days
            since last modification exceeds that value.

        Examples
        --------
        See https://pybliometrics.readthedocs.io/en/stable/examples/PlumXMetrics.html.

        Notes
        -----
        The directory for cached results is `{path}/ENHANCED/{identifier}`,
        where `path` is specified in `~/.scopus/config.ini`.
        """
        allowed_ids = ('airitiDocId', 'arxivId', 'cabiAbstractId',
                       'citeulikeId', 'digitalMeasuresArtifactId', 'doi',
                       'elsevierId', 'elsevierPii', 'facebookCountUrlId',
                       'figshareArticleId', 'githubRepoId', 'isbn', 'lccn',
                       'medwaveId', 'nctId', 'oclc', 'pittEprintDscholarId',
                       'pmcid', 'pmid', 'redditId', 'repecHandle', 'repoUrl',
                       'scieloId', 'sdEid', 'slideshareUrlId',
                       'smithsonianPddrId', 'soundcloudTrackId', 'ssrnId',
                       'urlId', 'usPatentApplicationId',
                       'usPatentPublicationId', 'vimeoVideoId',
                       'youtubeVideoId')
        if id_type not in allowed_ids:
            raise ValueError('Id type must be one of: ' +
                             ', '.join(allowed_ids))
        self.id_type = id_type
        self.identifier = identifier
        Retrieval.__init__(self,
                           identifier=identifier,
                           id_type=id_type,
                           api='PlumXMetrics',
                           refresh=refresh,
                           view='ENHANCED')
    def __init__(self,
                 identifier=None,
                 refresh=False,
                 view='META_ABS',
                 id_type=None):
        """Interaction with the Abstract Retrieval API.

        Parameters
        ----------
        identifier : str or int
            The identifier of a document.  Can be the Scopus EID, the Scopus
            ID, the PII, the Pubmed-ID or the DOI.

        refresh : bool or int (optional, default=False)
            Whether to refresh the cached file if it exists or not.  If int
            is passed, cached file will be refreshed if the number of days
            since last modification exceeds that value.

        id_type: str (optional, default=None)
            The type of used ID. Allowed values: None, 'eid', 'pii',
            'scopus_id', 'pubmed_id', 'doi'.  If the value is None, the
            function tries to infer the ID type itself.

        view : str (optional, default=META_ABS)
            The view of the file that should be downloaded.  Allowed values:
            META, META_ABS, REF, FULL, where FULL includes all information
            of META_ABS view and META_ABS includes all information of the
            META view.  For details see
            https://dev.elsevier.com/guides/AbstractRetrievalViews.htm.

        Raises
        ------
        ValueError
            If the id_type parameter or the view parameter contains
            invalid entries.

        Examples
        --------
        See https://pybliometrics.readthedocs.io/en/stable/examples/AbstractRetrieval.html.

        Notes
        -----
        The directory for cached results is `{path}/{view}/{identifier}`,
        where `path` is specified in `~/.scopus/config.ini`.  In case
        `identifier` is a DOI,, an underscore replaces the forward slash.
        """
        # Checks
        identifier = str(identifier)
        allowed_views = ('META', 'META_ABS', 'REF', 'FULL')
        if view not in allowed_views:
            raise ValueError('view parameter must be one of ' +
                             ', '.join(allowed_views))
        if id_type is None:
            id_type = detect_id_type(identifier)
        else:
            allowed_id_types = ('eid', 'pii', 'scopus_id', 'pubmed_id', 'doi')
            if id_type not in allowed_id_types:
                raise ValueError('id_type parameter must be one of ' +
                                 ', '.join(allowed_id_types))

        # Load json
        Retrieval.__init__(self,
                           identifier=identifier,
                           id_type=id_type,
                           api='AbstractRetrieval',
                           refresh=refresh,
                           view=view)
        self._json = self._json['abstracts-retrieval-response']
        self._head = chained_get(self._json, ["item", "bibrecord", "head"], {})
        conf_path = [
            'source', 'additional-srcinfo', 'conferenceinfo', 'confevent'
        ]
        self._confevent = chained_get(self._head, conf_path, {})
        if self._view == "REF":
            ref_path = ["references"]
        else:
            ref_path = ['item', 'bibrecord', 'tail', 'bibliography']
        self._ref = chained_get(self._json, ref_path, {})
예제 #3
0
    def __init__(self,
                 identifier: str,
                 id_type: str,
                 refresh: Union[bool, int] = False,
                 **kwds: str
                 ) -> None:
        """Interaction with the PlumX Metrics API.

        :param identifier: The identifier of a document.
        :param id_type: The type of used ID. Allowed values are:
                        - 'airitiDocId'
                        - 'arxivId'
                        - 'cabiAbstractId'
                        - 'citeulikeId'
                        - 'digitalMeasuresArtifactId'
                        - 'doi'
                        - 'elsevierId'
                        - 'elsevierPii'
                        - 'facebookCountUrlId'
                        - 'figshareArticleId'
                        - 'githubRepoId'
                        - 'isbn'
                        - 'lccn'
                        - 'medwaveId'
                        - 'nctId'
                        - 'oclc'
                        - 'pittEprintDscholarId'
                        - 'pmcid'
                        - 'pmid'
                        - 'redditId'
                        - 'repecHandle'
                        - 'repoUrl'
                        - 'scieloId'
                        - 'sdEid'
                        - 'slideshareUrlId'
                        - 'smithsonianPddrId'
                        - 'soundcloudTrackId'
                        - 'ssrnId'
                        - 'urlId'
                        - 'usPatentApplicationId'
                        - 'usPatentPublicationId'
                        - 'vimeoVideoId'
                        - 'youtubeVideoId'
        :param refresh: Whether to refresh the cached file if it exists or not.
                        If int is passed, cached file will be refreshed if the
                        number of days since last modification exceeds that value.
        :param kwds: Keywords passed on as query parameters.  Must contain
                     fields and values mentioned in the API specification at
                     https://dev.elsevier.com/documentation/PlumXMetricsAPI.wadl.

        Raises
        ------
        ValueError
            If the parameter `refresh` is not one of the allowed values.

        Notes
        -----
        The directory for cached results is `{path}/ENHANCED/{identifier}`,
        where `path` is specified in your configuration file.
        """
        # Checks
        allowed = ('airitiDocId', 'arxivId', 'cabiAbstractId',
                   'citeulikeId', 'digitalMeasuresArtifactId', 'doi',
                   'elsevierId', 'elsevierPii', 'facebookCountUrlId',
                   'figshareArticleId', 'githubRepoId', 'isbn',
                   'lccn', 'medwaveId', 'nctId', 'oclc',
                   'pittEprintDscholarId', 'pmcid', 'pmid', 'redditId',
                   'repecHandle', 'repoUrl', 'scieloId', 'sdEid',
                   'slideshareUrlId', 'smithsonianPddrId', 'soundcloudTrackId',
                   'ssrnId', 'urlId', 'usPatentApplicationId',
                   'usPatentPublicationId', 'vimeoVideoId', 'youtubeVideoId')
        check_parameter_value(id_type, allowed, "id_type")
        self._id_type = id_type
        self._identifier = identifier

        # Load json
        self._refresh = refresh
        self._view = 'ENHANCED'
        Retrieval.__init__(self, identifier=identifier, id_type=id_type,
                           api='PlumXMetrics', **kwds)
        cats = self._json.get('count_categories', [])
        self._count_categories = {d["name"]: d['count_types'] for d in cats}
예제 #4
0
    def __init__(self,
                 identifier: List[Union[int, str]],
                 start: Union[int, str],
                 end: Union[int, str] = datetime.now().year,
                 id_type: str = "scopus_id",
                 eid: str = None,
                 refresh: Union[bool, int] = False,
                 citation: Optional[str] = None,
                 **kwds: str) -> None:
        """Interaction witht the Citation Overview API.

        :param identifier: Up to 25 identifiers for which  to look up
                           citations.  Must be Scopus IDs, DOIs, PIIs or
                           Pubmed IDs.
        :param start: The first year for which the citation count should
                      be loaded.
        :param end: The last year for which the citation count should be
                    loaded. Defaults to the current year.
        :param id_type: The type of the IDs provided in `identifier`.  Must be
                        one of "scopus_id", "doi", "pii", "pubmed_id".
        :param eid: (deprecated) The Scopus ID of the abstract - will be
                    removed in a future release: Instead use param `scopus_id`
                    after stripping the part until the second hyphen.  If you
                    use this parameter, it will be converted to `scopus_id`
                    instead.
        :param refresh: Whether to refresh the cached file if it exists or not.
                        If int is passed, cached file will be refreshed if the
                        number of days since last modification exceeds that value.
        :param citation: Allows for the exclusion of self-citations or those
                         by books.  If `None`, will count all citations.
                         Allowed values: None, exclude-self, exclude-books
        :param kwds: Keywords passed on as query parameters.  Must contain
                     fields and values mentioned in the API specification at
                     https://dev.elsevier.com/documentation/AbstractCitationAPI.wadl.

        Raises
        -----
        ValueError
            If parameter `identifier` contains fewer than 1 or more than
            25 elements.

        ValueError
            If any of the parameters `citation`, `id_type` or `refresh` is not
            one of the allowed values.

        Notes
        -----
        The directory for cached results is `{path}/STANDARD/{id}-{citation}`,
        where `path` is specified in your configuration file, and `id` the
        md5-hashed version of a string joining `identifier` on underscore.

        Your API Key needs to be augmented by Elsevier's Scopus
        Integration Team to access this API.
        """
        # Checks
        allowed = ('scopus_id', 'doi', 'pii', 'pubmed_id')
        check_parameter_value(id_type, allowed, "id_type")
        if citation:
            allowed = ('exclude-self', 'exclude-books')
            check_parameter_value(citation, allowed, "citation")
        if eid or not isinstance(identifier, list):
            msg = "Parameter `eid` is deprecated and will be removed in a "\
                  "future release.  Instead, provide the corresponding "\
                  "Scopus ID via parameter `identifier` as a list, and set "\
                  "`id_type='scopus_id'`."
            warn(msg, FutureWarning)
        if len(identifier) < 0 or len(identifier) > 25:
            msg = "Provide at least 1 and at most than 25 identifiers"
            raise ValueError(msg)

        # Variables
        identifier = [str(i) for i in identifier]
        self._start = int(start)
        self._end = int(end)
        self._citation = citation
        self._refresh = refresh
        self._view = "STANDARD"

        # Get file content
        date = f'{start}-{end}'
        kwds.update({id_type: identifier})
        stem = md5("_".join(identifier).encode('utf8')).hexdigest()
        Retrieval.__init__(self,
                           stem,
                           api='CitationOverview',
                           date=date,
                           citation=citation,
                           **kwds)
        self._data = self._json['abstract-citations-response']

        # citeInfoMatrix
        matrix = self._data['citeInfoMatrix']['citeInfoMatrixXML'][
            'citationMatrix']['citeInfo']
        self._citeInfoMatrix = [_parse_dict(e) for e in matrix]
        # identifier-legend
        identifier = self._data['identifier-legend']['identifier']
        self._identifierlegend = [_parse_dict(e) for e in identifier]
        # citeCountHeader
        self._citeCountHeader = self._data['citeColumnTotalXML'][
            "citeCountHeader"]
예제 #5
0
    def __init__(self, eid, start, end=datetime.now().year, citation=None,
                 refresh=False):
        """Interaction witht the Citation Overview API.

        Parameters
        ----------
        eid : str
            The EID of the abstract.

        start : str or int
            The first year for which the citation count should be loaded

        end : str or int (optional, default=datetime.now().year)
            The last year for which the citation count should be loaded.
            Default is the current year.

        citation : str (optional, default=None)
            Allows for the exclusion of self-citations or those by books.
            If None, will count all citations.
            Allowed values: None, exclude-self, exclude-books

        refresh : bool or int (optional, default=False)
            Whether to refresh the cached file if it exists or not.  If int
            is passed, cached file will be refreshed if the number of days
            since last modification exceeds that value.

        Examples
        --------
        See https://pybliometrics.readthedocs.io/en/stable/examples/CitationOverview.html.

        Notes
        -----
        The directory for cached results is `{path}/STANDARD/{eid}`,
        where `path` is specified in `~/.scopus/config.ini`.

        Your API Key needs to be approved by Elsevier to access this API.
        """
        # Checks
        if citation:
            allowed = ('exclude-self', 'exclude-books')
            check_parameter_value(citation, allowed, "citation")

        # Variables
        self._start = int(start)
        self._end = int(end)
        self._citation = citation
        view = "STANDARD"  # In case Scopus adds different views in future

        # Get file content
        date = f'{start}-{end}'
        Retrieval.__init__(self, eid, 'CitationOverview', refresh, view=view,
                           date=date, citation=citation)
        self._data = self._json['abstract-citations-response']

        # citeInfoMatrix
        m = self._data['citeInfoMatrix']['citeInfoMatrixXML']['citationMatrix']['citeInfo'][0]
        self._citeInfoMatrix = _parse_dict(m)
        # identifier-legend
        l = self._data['identifier-legend']['identifier'][0]
        self._identifierlegend = _parse_dict(l)
        # citeColumnTotalXML
        self._citeColumnTotalXML = self._data['citeColumnTotalXML']  # not used
예제 #6
0
    def __init__(self,
                 author_id: Union[int, str],
                 refresh: Union[bool, int] = False,
                 view: str = "ENHANCED",
                 **kwds: str) -> None:
        """Interaction with the Author Retrieval API.

        :param author_id: The ID or the EID of the author.
        :param refresh: Whether to refresh the cached file if it exists or not.
                        If int is passed, cached file will be refreshed if the
                        number of days since last modification exceeds that value.
        :param view: The view of the file that should be downloaded.  Allowed
                     values: METRICS, LIGHT, STANDARD, ENHANCED, where STANDARD
                     includes all information of LIGHT view and ENHANCED
                     includes all information of any view.  For details see
                     https://dev.elsevier.com/sc_author_retrieval_views.html.
                     Note: Neither the BASIC nor the DOCUMENTS view are active,
                     although documented.
        :param kwds: Keywords passed on as query parameters.  Must contain
                     fields and values mentioned in the API specification at
                     https://dev.elsevier.com/documentation/AuthorRetrievalAPI.wadl.

        Raises
        ------
        ValueError
            If any of the parameters `refresh` or `view` is not
            one of the allowed values.

        Notes
        -----
        The directory for cached results is `{path}/ENHANCED/{author_id}`,
        where `path` is specified in your configuration file, and `author_id`
        is stripped of an eventually leading `'9-s2.0-'`.
        """
        # Checks
        allowed_views = ('METRICS', 'LIGHT', 'STANDARD', 'ENHANCED')
        check_parameter_value(view, allowed_views, "view")

        # Load json
        self._id = str(author_id).split('-')[-1]
        self._view = view
        self._refresh = refresh
        Retrieval.__init__(self,
                           identifier=self._id,
                           api='AuthorRetrieval',
                           **kwds)

        # Parse json
        self._json = self._json['author-retrieval-response']
        try:
            self._json = self._json[0]
        except KeyError:  # Incomplete forward
            alias_json = listify(self._json['alias']['prism:url'])
            self._alias = [d['$'].split(':')[-1] for d in alias_json]
            alias_str = ', '.join(self._alias)
            text = f'Author profile with ID {author_id} has been merged and '\
                   f'the main profile is now one of {alias_str}.  Please update '\
                   'your records manually.  Functionality of this object is '\
                   'reduced.'
            warn(text, UserWarning)
        else:
            self._alias = None
        self._profile = self._json.get("author-profile", {})
예제 #7
0
    def __init__(self,
                 identifier: Union[int, str] = None,
                 refresh: Union[bool, int] = False,
                 view: str = 'META_ABS',
                 id_type: str = None,
                 **kwds: str) -> None:
        """Interaction with the Abstract Retrieval API.

        :param identifier: The identifier of a document.  Can be the Scopus EID
                           , the Scopus ID, the PII, the Pubmed-ID or the DOI.
        :param refresh: Whether to refresh the cached file if it exists or not.
                        If int is passed, cached file will be refreshed if the
                        number of days since last modification exceeds that value.
        :param id_type: The type of used ID. Allowed values: None, 'eid', 'pii',
                        'scopus_id', 'pubmed_id', 'doi'.  If the value is None,
                        the function tries to infer the ID type itself.
        :param view: The view of the file that should be downloaded.  Allowed
                     values: META, META_ABS, REF, FULL, where FULL includes all
                     information of META_ABS view and META_ABS includes all
                     information of the META view.  For details see
                     https://dev.elsevier.com/sc_abstract_retrieval_views.html.
        :param kwds: Keywords passed on as query parameters.  Must contain
                     fields and values listed in the API specification at
                     https://dev.elsevier.com/documentation/AbstractRetrievalAPI.wadl.

        Raises
        ------
        ValueError
            If any of the parameters `id_type`, `refresh` or `view` is not
            one of the allowed values.

        Notes
        -----
        The directory for cached results is `{path}/{view}/{identifier}`,
        where `path` is specified in your configuration file.  In case
        `identifier` is a DOI, an underscore replaces the forward slash.
        """
        # Checks
        identifier = str(identifier)
        check_parameter_value(view, ('META', 'META_ABS', 'REF', 'FULL'),
                              "view")
        if id_type is None:
            id_type = detect_id_type(identifier)
        else:
            allowed_id_types = ('eid', 'pii', 'scopus_id', 'pubmed_id', 'doi')
            check_parameter_value(id_type, allowed_id_types, "id_type")

        # Load json
        self._view = view
        self._refresh = refresh
        Retrieval.__init__(self,
                           identifier=identifier,
                           id_type=id_type,
                           api='AbstractRetrieval',
                           **kwds)
        self._json = self._json['abstracts-retrieval-response']
        self._head = chained_get(self._json, ["item", "bibrecord", "head"], {})
        conf_path = [
            'source', 'additional-srcinfo', 'conferenceinfo', 'confevent'
        ]
        self._confevent = chained_get(self._head, conf_path, {})
        if self._view == "REF":
            ref_path = ["references"]
        else:
            ref_path = ['item', 'bibrecord', 'tail', 'bibliography']
        self._ref = chained_get(self._json, ref_path, {})