Ejemplo n.º 1
0
    def __init__(self, author_id, refresh=False, view="ENHANCED"):
        """Interaction with the Author Retrieval API.

        Parameters
        ----------
        author_id : str or int
            The ID of the author to search for.  Optionally expressed
            as an Elsevier EID (i.e., in the form 9-s2.0-nnnnnnnn).

        refresh : bool or int (optional, default=False)
            Whether to refresh the cached file if it exists or not.  If int
            is passed, cached file will be refreshed if the number of days
            since last modification exceeds that value.

        view : str (optional, default=META_ABS)
            The view of the file that should be downloaded.  Allowed values:
            METRICS, LIGHT, STANDARD, ENHANCED, where STANDARD includes all
            information of LIGHT view and ENHANCED includes all information of
            any view.  For details see
            https://dev.elsevier.com/sc_author_retrieval_views.html.
            Note: Neither the BASIC nor the DOCUMENTS view are not active,
            although documented.

        Examples
        --------
        See https://pybliometrics.readthedocs.io/en/stable/examples/AuthorRetrieval.html.

        Notes
        -----
        The directory for cached results is `{path}/ENHANCED/{author_id}`,
        where `path` is specified in `~/.scopus/config.ini` and `author_id`
        is stripped of an eventually leading `'9-s2.0-'`.
        """
        # Checks
        allowed_views = ('METRICS', 'LIGHT', 'STANDARD', 'ENHANCED')
        check_parameter_value(view, allowed_views, "view")

        # Load json
        self._id = str(int(str(author_id).split('-')[-1]))
        Retrieval.__init__(self, identifier=self._id, api='AuthorRetrieval',
                           refresh=refresh, view=view)
        self._json = self._json['author-retrieval-response']
        try:
            self._json = self._json[0]
        except KeyError:  # Incomplete forward
            alias_json = listify(self._json['alias']['prism:url'])
            self._alias = [d['$'].split(':')[-1] for d in alias_json]
            alias_str = ', '.join(self._alias)
            text = f'Author profile with ID {author_id} has been merged and '\
                   f'the main profile is now one of {alias_str}.  Please update '\
                   'your records manually.  Functionality of this object is '\
                   'reduced.'
            warn(text, UserWarning)
        else:
            self._alias = None
        self._profile = self._json.get("author-profile", {})
Ejemplo n.º 2
0
    def __init__(self, query, refresh=False, view='ENHANCED'):
        """Interaction with the Serial Title API.

        Parameters
        ----------
        query: dict
            Query parameters and corresponding fields. Allowed keys 'title',
            'issn', 'pub', 'subj', 'subjCode', 'content', 'oa'.  For
            examples on possible values, please refer to
            https://dev.elsevier.com/documentation/SerialTitleAPI.wadl#d1e22.

        refresh : bool or int (optional, default=False)
            Whether to refresh the cached file if it exists or not.  If int
            is passed, cached file will be refreshed if the number of days
            since last modification exceeds that value.

        view : str (optional, default="ENHANCED")
            The view of the file that should be downloaded.  Allowed values:
            STANDARD, ENHANCED, CITESCORE.  For details see
            https://dev.elsevier.com/sc_serial_title_views.html.

        Raises
        ------
        Scopus400Error
            If provided value for a query key is invalid or if for
            non-subscribers the number of search results exceeds 5000.

        ValueError
            If view parameter is not one of allowed ones or if query contains
            invalid fields.

        Examples
        --------
        See https://pybliometrics.readthedocs.io/en/stable/examples/SerialSearch.html.

        Notes
        -----
        The directory for cached results is `{path}/{view}/{fname}`,
        where `path` is specified in `~/.scopus/config.ini` and fname is
        the md5-hashed version of `query` dict turned into string in format
        of 'key=value' delimited by '&'.
        """
        # Checks
        allowed_query_keys = ('title', 'issn', 'date', 'pub', 'subj',
                              'subjCode', 'content', 'oa')
        invalid = [k for k in query.keys() if k not in allowed_query_keys]
        if invalid:
            raise ValueError(f'Query key(s) "{", ".join(invalid)}" invalid')
        check_parameter_value(view, ('STANDARD', 'ENHANCED', 'CITESCORE'), "view")

        # Query
        self.query = str(query)
        Search.__init__(self, query=query, api='SerialSearch',
                        refresh=refresh, view=view)
        self._n = len(self._json['serial-metadata-response'].get('entry', []))
Ejemplo n.º 3
0
    def __init__(self,
                 query: Dict,
                 refresh: Union[bool, int] = False,
                 view: str = 'ENHANCED',
                 **kwds: str) -> None:
        """Interaction with the Serial Title API.

        :param query:  Query parameters and corresponding fields. Allowed keys
                      'title', 'issn', 'pub', 'subj', 'subjCode', 'content',
                      'oa'.  For examples on possible values, please refer to
                      https://dev.elsevier.com/documentation/SerialTitleAPI.wadl#d1e22.
        :param refresh: Whether to refresh the cached file if it exists or not.
                        If int is passed, cached file will be refreshed if the
                        number of days since last modification exceeds that value.
        :param view: The view of the file that should be downloaded.  Allowed
                     values: STANDARD, ENHANCED, CITESCORE.  For details see
                     https://dev.elsevier.com/sc_serial_title_views.html.
        :param kwds: Keywords passed on as query parameters.  Must contain
                     fields and values listed in the API specification at
                     https://dev.elsevier.com/documentation/SerialTitleAPI.wadl.

        Raises
        ------
        Scopus400Error
            If provided value for a query key is invalid or if for
            non-subscribers the number of search results exceeds 5000.

        ValueError
            If any of the parameters `refresh` or `view` is not
            one of the allowed values.

        Notes
        -----
        The directory for cached results is `{path}/{view}/{fname}`,
        where `path` is specified in your configuration file, and `fname` is
        the md5-hashed version of `query` dict turned into string in format
        of 'key=value' delimited by '&'.
        """
        # Checks
        allowed_query_keys = ('title', 'issn', 'date', 'pub', 'subj',
                              'subjCode', 'content', 'oa')
        invalid = [k for k in query.keys() if k not in allowed_query_keys]
        if invalid:
            raise ValueError(f'Query key(s) "{", ".join(invalid)}" invalid')
        check_parameter_value(view, ('STANDARD', 'ENHANCED', 'CITESCORE'),
                              "view")

        # Query
        self._query = str(query)
        self._refresh = refresh
        self._view = view
        Search.__init__(self, query=query, api='SerialSearch', **kwds)
        self._n = len(self._json['serial-metadata-response'].get('entry', []))
Ejemplo n.º 4
0
    def __init__(self, issn, refresh=False, view="ENHANCED", years=None):
        """Interaction with the Serial Title API.

        Parameters
        ----------
        issn : str or int
            The ISSN or the E-ISSN of the source.

        refresh : bool or int (optional, default=False)
            Whether to refresh the cached file if it exists or not.  If int
            is passed, cached file will be refreshed if the number of days
            since last modification exceeds that value.

        view : str (optional, default="ENHANCED")
            The view of the file that should be downloaded.  Allowed values:
            BASIC, STANDARD, ENHANCED.  For details see
            https://dev.elsevier.com/sc_serial_title_views.html.

        years : str (optional, default=None)
            A string specifying a year or range of years (combining two
            years with a hyphen) for which yearly metric data (SJR, SNIP,
            yearly-data) should be looked up for.  If None, only the
            most recent metric data values are provided.
            Note: If not None, refresh will always be True.

        Examples
        --------
        See https://pybliometrics.readthedocs.io/en/stable/examples/SerialTitle.html.

        Notes
        -----
        The directory for cached results is `{path}/{view}/{source_id}`,
        where `path` is specified in `~/.scopus/config.ini`.
        """
        # Checks
        check_parameter_value(view, ('BASIC', 'STANDARD', 'ENHANCED'), "view")
        # Load json
        self._id = str(issn)
        self._years = years
        # Force refresh when years is specified
        if years:
            refresh = True
        Retrieval.__init__(self,
                           identifier=self._id,
                           view=view,
                           date=years,
                           api='SerialTitle',
                           refresh=refresh)
        self._json = self._json['serial-metadata-response']
        self._entry = self._json['entry'][0]
Ejemplo n.º 5
0
    def __init__(self, aff_id, refresh=False, view="STANDARD"):
        """Interaction with the Affiliation Retrieval API.

        Parameters
        ----------
        aff_id : str or int
            The Scopus Affiliation ID.  Optionally expressed
            as an Elsevier EID (i.e., in the form 10-s2.0-nnnnnnnn).

        refresh : bool or int (optional, default=False)
            Whether to refresh the cached file if it exists or not.  If int
            is passed, cached file will be refreshed if the number of days
            since last modification exceeds that value.

        view : str (optional, default=STANDARD)
            The view of the file that should be downloaded.  Allowed values:
            LIGHT, STANDARD, where STANDARD includes all information of the
            LIGHT view.  For details see
            https://dev.elsevier.com/sc_affil_retrieval_views.html.
            Note: Neither the BASIC view nor DOCUMENTS or AUTHORS views are
            active, although documented.

        Examples
        --------
        See https://pybliometrics.readthedocs.io/en/stable/examples/AffiliationRetrieval.html.

        Notes
        -----
        The directory for cached results is `{path}/{view}/{aff_id}`,
        where `path` is specified in `~/.scopus/config.ini`.
        """
        # Checks
        check_parameter_value(view, ('LIGHT', 'STANDARD'), "view")

        # Load json
        aff_id = str(int(str(aff_id).split('-')[-1]))
        Retrieval.__init__(self,
                           identifier=aff_id,
                           view=view,
                           refresh=refresh,
                           api='AffiliationRetrieval')
        self._json = self._json['affiliation-retrieval-response']
        self._profile = self._json.get("institution-profile", {})
Ejemplo n.º 6
0
    def __init__(self,
                 aff_id: Union[int, str],
                 refresh: Union[bool, int] = False,
                 view: str = "STANDARD",
                 **kwds: str) -> None:
        """Interaction with the Affiliation Retrieval API.

        :param aff_id: Scopus ID or EID of the affiliation profile.
        :param refresh: Whether to refresh the cached file if it exists or not.
                        If int is passed, cached file will be refreshed if the
                        number of days since last modification exceeds that value.
        :param view: The view of the file that should be downloaded.  Allowed
                     values: LIGHT, STANDARD, where STANDARD includes all
                     information of the LIGHT view.  For details see
                     https://dev.elsevier.com/sc_affil_retrieval_views.html.
                     Note: Neither the BASIC view nor DOCUMENTS or AUTHORS
                     views are active, although documented.
        :param kwds: Keywords passed on as query parameters.  Must contain
                     fields and values mentioned in the API specification at
                     https://dev.elsevier.com/documentation/AffiliationRetrievalAPI.wadl.

        Raises
        ------
        ValueError
            If any of the parameters `refresh` or `view` is not
            one of the allowed values.

        Notes
        -----
        The directory for cached results is `{path}/{view}/{aff_id}`,
        where `path` is specified in your configuration file.
        """
        # Checks
        check_parameter_value(view, ('LIGHT', 'STANDARD'), "view")

        # Load json
        self._view = view
        self._refresh = refresh
        aff_id = str(int(str(aff_id).split('-')[-1]))
        Retrieval.__init__(self, aff_id, api='AffiliationRetrieval', **kwds)
        self._json = self._json['affiliation-retrieval-response']
        self._profile = self._json.get("institution-profile", {})
Ejemplo n.º 7
0
    def __init__(self,
                 identifier: List[Union[int, str]],
                 start: Union[int, str],
                 end: Union[int, str] = datetime.now().year,
                 id_type: str = "scopus_id",
                 eid: str = None,
                 refresh: Union[bool, int] = False,
                 citation: Optional[str] = None,
                 **kwds: str) -> None:
        """Interaction witht the Citation Overview API.

        :param identifier: Up to 25 identifiers for which  to look up
                           citations.  Must be Scopus IDs, DOIs, PIIs or
                           Pubmed IDs.
        :param start: The first year for which the citation count should
                      be loaded.
        :param end: The last year for which the citation count should be
                    loaded. Defaults to the current year.
        :param id_type: The type of the IDs provided in `identifier`.  Must be
                        one of "scopus_id", "doi", "pii", "pubmed_id".
        :param eid: (deprecated) The Scopus ID of the abstract - will be
                    removed in a future release: Instead use param `scopus_id`
                    after stripping the part until the second hyphen.  If you
                    use this parameter, it will be converted to `scopus_id`
                    instead.
        :param refresh: Whether to refresh the cached file if it exists or not.
                        If int is passed, cached file will be refreshed if the
                        number of days since last modification exceeds that value.
        :param citation: Allows for the exclusion of self-citations or those
                         by books.  If `None`, will count all citations.
                         Allowed values: None, exclude-self, exclude-books
        :param kwds: Keywords passed on as query parameters.  Must contain
                     fields and values mentioned in the API specification at
                     https://dev.elsevier.com/documentation/AbstractCitationAPI.wadl.

        Raises
        -----
        ValueError
            If parameter `identifier` contains fewer than 1 or more than
            25 elements.

        ValueError
            If any of the parameters `citation`, `id_type` or `refresh` is not
            one of the allowed values.

        Notes
        -----
        The directory for cached results is `{path}/STANDARD/{id}-{citation}`,
        where `path` is specified in your configuration file, and `id` the
        md5-hashed version of a string joining `identifier` on underscore.

        Your API Key needs to be augmented by Elsevier's Scopus
        Integration Team to access this API.
        """
        # Checks
        allowed = ('scopus_id', 'doi', 'pii', 'pubmed_id')
        check_parameter_value(id_type, allowed, "id_type")
        if citation:
            allowed = ('exclude-self', 'exclude-books')
            check_parameter_value(citation, allowed, "citation")
        if eid or not isinstance(identifier, list):
            msg = "Parameter `eid` is deprecated and will be removed in a "\
                  "future release.  Instead, provide the corresponding "\
                  "Scopus ID via parameter `identifier` as a list, and set "\
                  "`id_type='scopus_id'`."
            warn(msg, FutureWarning)
        if len(identifier) < 0 or len(identifier) > 25:
            msg = "Provide at least 1 and at most than 25 identifiers"
            raise ValueError(msg)

        # Variables
        identifier = [str(i) for i in identifier]
        self._start = int(start)
        self._end = int(end)
        self._citation = citation
        self._refresh = refresh
        self._view = "STANDARD"

        # Get file content
        date = f'{start}-{end}'
        kwds.update({id_type: identifier})
        stem = md5("_".join(identifier).encode('utf8')).hexdigest()
        Retrieval.__init__(self,
                           stem,
                           api='CitationOverview',
                           date=date,
                           citation=citation,
                           **kwds)
        self._data = self._json['abstract-citations-response']

        # citeInfoMatrix
        matrix = self._data['citeInfoMatrix']['citeInfoMatrixXML'][
            'citationMatrix']['citeInfo']
        self._citeInfoMatrix = [_parse_dict(e) for e in matrix]
        # identifier-legend
        identifier = self._data['identifier-legend']['identifier']
        self._identifierlegend = [_parse_dict(e) for e in identifier]
        # citeCountHeader
        self._citeCountHeader = self._data['citeColumnTotalXML'][
            "citeCountHeader"]
Ejemplo n.º 8
0
    def __init__(self,
                 query,
                 refresh=False,
                 download=True,
                 count=200,
                 integrity_fields=None,
                 integrity_action="raise",
                 verbose=False):
        """Interaction with the Affiliation Search API.

        Parameters
        ----------
        query : str
            A string of the query, e.g. "af-id(60021784)".

        refresh : bool or int (optional, default=False)
            Whether to refresh the cached file if it exists or not.  If int
            is passed, cached file will be refreshed if the number of days
            since last modification exceeds that value.

        count : int (optional, default=200)
            The number of entries to be displayed at once.  A smaller number
            means more queries with each query having less results.

        download : bool (optional, default=True)
            Whether to download results (if they have not been cached).

        integrity_fields : None or iterable (default=None)
            Iterable of field names whose completeness should be checked.
            ScopusSearch will perform the action specified in
            `integrity_action` if elements in these fields are missing.  This
            helps avoiding idiosynchratically missing elements that should
            always be present, such as the EID or the name.

        integrity_action : str (optional, default="raise")
            What to do in case integrity of provided fields cannot be
            verified.  Possible actions:
            - "raise": Raise an AttributeError
            - "warn": Raise a UserWarning

        verbose : bool (optional, default=False)
            Whether to print a downloading progress bar to terminal. Has no
            effect for download=False.

        Raises
        ------
        ScopusQueryError
            If the number of search results exceeds 5000.

        ValueError
            If the integrity_action parameter is not one of the allowed ones.

        Examples
        --------
        See https://pybliometrics.readthedocs.io/en/stable/examples/AffiliationSearch.html.

        Notes
        -----
        The directory for cached results is `{path}/STANDARD/{fname}`,
        where  `path` is specified in `~/.scopus/config.ini` and fname is
        the md5-hashed version of `query`.
        """
        # Check
        allowed = ("warn", "raise")
        check_parameter_value(integrity_action, allowed, "integrity_action")

        # Query
        self.query = query
        Search.__init__(self,
                        query=query,
                        api="AffiliationSearch",
                        refresh=refresh,
                        count=count,
                        download=download,
                        verbose=verbose,
                        view="STANDARD")
        self.integrity = integrity_fields or []
        self.action = integrity_action
Ejemplo n.º 9
0
    def __init__(self,
                 identifier: str,
                 id_type: str,
                 refresh: Union[bool, int] = False,
                 **kwds: str
                 ) -> None:
        """Interaction with the PlumX Metrics API.

        :param identifier: The identifier of a document.
        :param id_type: The type of used ID. Allowed values are:
                        - 'airitiDocId'
                        - 'arxivId'
                        - 'cabiAbstractId'
                        - 'citeulikeId'
                        - 'digitalMeasuresArtifactId'
                        - 'doi'
                        - 'elsevierId'
                        - 'elsevierPii'
                        - 'facebookCountUrlId'
                        - 'figshareArticleId'
                        - 'githubRepoId'
                        - 'isbn'
                        - 'lccn'
                        - 'medwaveId'
                        - 'nctId'
                        - 'oclc'
                        - 'pittEprintDscholarId'
                        - 'pmcid'
                        - 'pmid'
                        - 'redditId'
                        - 'repecHandle'
                        - 'repoUrl'
                        - 'scieloId'
                        - 'sdEid'
                        - 'slideshareUrlId'
                        - 'smithsonianPddrId'
                        - 'soundcloudTrackId'
                        - 'ssrnId'
                        - 'urlId'
                        - 'usPatentApplicationId'
                        - 'usPatentPublicationId'
                        - 'vimeoVideoId'
                        - 'youtubeVideoId'
        :param refresh: Whether to refresh the cached file if it exists or not.
                        If int is passed, cached file will be refreshed if the
                        number of days since last modification exceeds that value.
        :param kwds: Keywords passed on as query parameters.  Must contain
                     fields and values mentioned in the API specification at
                     https://dev.elsevier.com/documentation/PlumXMetricsAPI.wadl.

        Raises
        ------
        ValueError
            If the parameter `refresh` is not one of the allowed values.

        Notes
        -----
        The directory for cached results is `{path}/ENHANCED/{identifier}`,
        where `path` is specified in your configuration file.
        """
        # Checks
        allowed = ('airitiDocId', 'arxivId', 'cabiAbstractId',
                   'citeulikeId', 'digitalMeasuresArtifactId', 'doi',
                   'elsevierId', 'elsevierPii', 'facebookCountUrlId',
                   'figshareArticleId', 'githubRepoId', 'isbn',
                   'lccn', 'medwaveId', 'nctId', 'oclc',
                   'pittEprintDscholarId', 'pmcid', 'pmid', 'redditId',
                   'repecHandle', 'repoUrl', 'scieloId', 'sdEid',
                   'slideshareUrlId', 'smithsonianPddrId', 'soundcloudTrackId',
                   'ssrnId', 'urlId', 'usPatentApplicationId',
                   'usPatentPublicationId', 'vimeoVideoId', 'youtubeVideoId')
        check_parameter_value(id_type, allowed, "id_type")
        self._id_type = id_type
        self._identifier = identifier

        # Load json
        self._refresh = refresh
        self._view = 'ENHANCED'
        Retrieval.__init__(self, identifier=identifier, id_type=id_type,
                           api='PlumXMetrics', **kwds)
        cats = self._json.get('count_categories', [])
        self._count_categories = {d["name"]: d['count_types'] for d in cats}
Ejemplo n.º 10
0
    def __init__(self,
                 query: str,
                 refresh: Union[bool, int] = False,
                 verbose: bool = False,
                 download: bool = True,
                 integrity_fields: Union[List[str], Tuple[str, ...]] = None,
                 integrity_action: str = "raise",
                 count: int = 200,
                 **kwds: str) -> None:
        """Interaction with the Author Search API.

        :param query: A string of the query.  For allowed fields and values see
                      https://dev.elsevier.com/sc_author_search_tips.html.
        :param refresh: Whether to refresh the cached file if it exists or not.
                        If int is passed, cached file will be refreshed if the
                        number of days since last modification exceeds that value.
        :param download: Whether to download results (if they have not been
                         cached).
        :param verbose: Whether to print a download progress bar.
        :param integrity_fields: Names of fields whose completeness should
                                 be checked.  ScopusSearch will perform the
                                 action specified in `integrity_action` if
                                 elements in these fields are missing.  This
                                 helps avoiding idiosynchratically missing
                                 elements that should always be present
                                 (e.g., EID or source ID).
        :param integrity_action: What to do in case integrity of provided fields
                                 cannot be verified.  Possible actions:
                                 - "raise": Raise an AttributeError
                                 - "warn": Raise a UserWarning
        :param count: (deprecated) The number of entries to be displayed at
                      once.  A smaller number means more queries with each
                      query having fewer results.
        :param kwds: Keywords passed on as query parameters.  Must contain
                     fields and values mentioned in the API specification at
                     https://dev.elsevier.com/documentation/AuthorSearchAPI.wadl.

        Raises
        ------
        ScopusQueryError
            If the number of search results exceeds 5000, which is the API's
            maximum number of results returned.  The error prevents the
            download attempt and avoids making use of your API key.

        ValueError
            If any of the parameters `integrity_action` or `refresh` is not
            one of the allowed values.

        Notes
        -----
        The directory for cached results is `{path}/STANDARD/{fname}`,
        where  `path` is specified in your configuration file, and `fname` is
        the md5-hashed version of `query`.
        """
        # Checks
        allowed = ("warn", "raise")
        check_parameter_value(integrity_action, allowed, "integrity_action")
        if count != 200:
            msg = "Parameter `count` is deprecated and will be removed in a "\
                  "future release.  There will be no substitute."
            warn(msg, FutureWarning)

        # Query
        self._action = integrity_action
        self._integrity = integrity_fields or []
        self._query = query
        self._refresh = refresh
        self._view = "STANDARD"
        Search.__init__(self,
                        query=query,
                        api='AuthorSearch',
                        count=count,
                        download=download,
                        verbose=verbose,
                        **kwds)
Ejemplo n.º 11
0
    def __init__(self,
                 identifier: Union[int, str] = None,
                 refresh: Union[bool, int] = False,
                 view: str = 'META_ABS',
                 id_type: str = None,
                 **kwds: str) -> None:
        """Interaction with the Abstract Retrieval API.

        :param identifier: The identifier of a document.  Can be the Scopus EID
                           , the Scopus ID, the PII, the Pubmed-ID or the DOI.
        :param refresh: Whether to refresh the cached file if it exists or not.
                        If int is passed, cached file will be refreshed if the
                        number of days since last modification exceeds that value.
        :param id_type: The type of used ID. Allowed values: None, 'eid', 'pii',
                        'scopus_id', 'pubmed_id', 'doi'.  If the value is None,
                        the function tries to infer the ID type itself.
        :param view: The view of the file that should be downloaded.  Allowed
                     values: META, META_ABS, REF, FULL, where FULL includes all
                     information of META_ABS view and META_ABS includes all
                     information of the META view.  For details see
                     https://dev.elsevier.com/sc_abstract_retrieval_views.html.
        :param kwds: Keywords passed on as query parameters.  Must contain
                     fields and values listed in the API specification at
                     https://dev.elsevier.com/documentation/AbstractRetrievalAPI.wadl.

        Raises
        ------
        ValueError
            If any of the parameters `id_type`, `refresh` or `view` is not
            one of the allowed values.

        Notes
        -----
        The directory for cached results is `{path}/{view}/{identifier}`,
        where `path` is specified in your configuration file.  In case
        `identifier` is a DOI, an underscore replaces the forward slash.
        """
        # Checks
        identifier = str(identifier)
        check_parameter_value(view, ('META', 'META_ABS', 'REF', 'FULL'),
                              "view")
        if id_type is None:
            id_type = detect_id_type(identifier)
        else:
            allowed_id_types = ('eid', 'pii', 'scopus_id', 'pubmed_id', 'doi')
            check_parameter_value(id_type, allowed_id_types, "id_type")

        # Load json
        self._view = view
        self._refresh = refresh
        Retrieval.__init__(self,
                           identifier=identifier,
                           id_type=id_type,
                           api='AbstractRetrieval',
                           **kwds)
        self._json = self._json['abstracts-retrieval-response']
        self._head = chained_get(self._json, ["item", "bibrecord", "head"], {})
        conf_path = [
            'source', 'additional-srcinfo', 'conferenceinfo', 'confevent'
        ]
        self._confevent = chained_get(self._head, conf_path, {})
        if self._view == "REF":
            ref_path = ["references"]
        else:
            ref_path = ['item', 'bibrecord', 'tail', 'bibliography']
        self._ref = chained_get(self._json, ref_path, {})
Ejemplo n.º 12
0
    def __init__(self,
                 query,
                 refresh=False,
                 subscriber=True,
                 view=None,
                 download=True,
                 integrity_fields=None,
                 integrity_action="raise",
                 verbose=False,
                 **kwds):
        """Interaction with the Scopus Search API.

        Parameters
        ----------
        query : str
            A string of the query.

        refresh : bool or int (optional, default=False)
            Whether to refresh the cached file if it exists or not.  If int
            is passed, cached file will be refreshed if the number of days
            since last modification exceeds that value.

        subscriber : bool (optional, default=True)
            Whether the user accesses Scopus with a subscription or not.
            For subscribers, Scopus's cursor navigation will be used.
            Sets the number of entries in each query iteration to the maximum
            number allowed by the corresponding view.

        view : str (optional, default=None)
            Which view to use for the query, see
            https://dev.elsevier.com/sc_search_views.html.
            Allowed values: STANDARD, COMPLETE.  If None, defaults to
            COMPLETE if subscriber=True and to STANDARD if subscriber=False.

        download : bool (optional, default=True)
            Whether to download results (if they have not been cached).

        integrity_fields : None or iterable (default=None)
            Iterable of field names whose completeness should be checked.
            ScopusSearch will perform the action specified in
            `integrity_action` if elements in these fields are missing.  This
            helps avoiding idiosynchratically missing elements that should
            always be present, such as the EID or the source ID.

        integrity_action : str (optional, default="raise")
            What to do in case integrity of provided fields cannot be
            verified.  Possible actions:
            - "raise": Raise an AttributeError
            - "warn": Raise a UserWarning

        verbose : bool (optional, default=False)
            Whether to print a downloading progress bar to terminal.
            Has no effect for download=False or when query file is
            in cache.

        kwds : key-value parings, optional
            Keywords passed on as query parameters.  Must contain fields
            and values listed mentioned in the API specification
            (https://dev.elsevier.com/documentation/SCOPUSSearchAPI.wadl),
            such as "field" or "date".

        Raises
        ------
        ScopusQueryError
            For non-subscribers, if the number of search results exceeds 5000.

        ValueError
            If the view or the integrity_action parameter is not one of
            the allowed ones.

        Examples
        --------
        See https://pybliometrics.readthedocs.io/en/stable/examples/ScopusSearch.html.

        Notes
        -----
        The directory for cached results is `{path}/{view}/{fname}`,
        where `path` is specified in `~/.scopus/config.ini` and fname is
        the md5-hashed version of `query`.
        """
        # Checks
        if view:
            check_parameter_value(view, ('STANDARD', 'COMPLETE'), "view")
        allowed = ("warn", "raise")
        check_parameter_value(integrity_action, allowed, "integrity_action")

        # Parameters
        if not view:
            if subscriber:
                view = "COMPLETE"
            else:
                view = "STANDARD"
        count = 25
        if view == "STANDARD" and subscriber:
            count = 200
        if "cursor" in kwds:
            subscriber = kwds["cursor"]
            kwds.pop("cursor")
        if "count" in kwds:
            count = kwds["count"]
            kwds.pop("count")

        # Query
        self.query = query
        Search.__init__(self,
                        query=query,
                        api='ScopusSearch',
                        refresh=refresh,
                        count=count,
                        cursor=subscriber,
                        view=view,
                        download=download,
                        verbose=verbose,
                        **kwds)
        self.integrity = integrity_fields or []
        self.action = integrity_action
Ejemplo n.º 13
0
    def __init__(self,
                 query: str,
                 refresh: Union[bool, int] = False,
                 view: str = None,
                 verbose: bool = False,
                 download: bool = True,
                 integrity_fields: Union[List[str], Tuple[str, ...]] = None,
                 integrity_action: str = "raise",
                 subscriber: bool = True,
                 **kwds: str
                 ) -> None:
        """Interaction with the Scopus Search API.

        :param query: A string of the query as used in the Advanced Search
                     on scopus.com.  All fields except "INDEXTERMS()" and
                     "LIMIT-TO()" work.
        :param refresh: Whether to refresh the cached file if it exists or not.
                        If int is passed, cached file will be refreshed if the
                        number of days since last modification exceeds that value.
        :param view: Which view to use for the query, see
                     https://dev.elsevier.com/sc_search_views.html.
                     Allowed values: STANDARD, COMPLETE.  If None, defaults to
                     COMPLETE if `subscriber=True` and to STANDARD if
                     `subscriber=False`.
        :param verbose: Whether to print a download progress bar.
        :param download: Whether to download results (if they have not been
                         cached).
        :param integrity_fields: Names of fields whose completeness should
                                 be checked.  ScopusSearch will perform the
                                 action specified in `integrity_action` if
                                 elements in these fields are missing.  This
                                 helps avoiding idiosynchratically missing
                                 elements that should always be present
                                 (e.g., EID or source ID).
        :param integrity_action: What to do in case integrity of provided fields
                                 cannot be verified.  Possible actions:
                                 - "raise": Raise an AttributeError
                                 - "warn": Raise a UserWarning
        :param subscriber: Whether you access Scopus with a subscription or not.
                           For subscribers, Scopus's cursor navigation will be
                           used.  Sets the number of entries in each query
                           iteration to the maximum number allowed by the
                           corresponding view.
        :param kwds: Keywords passed on as query parameters.  Must contain
                     fields and values mentioned in the API specification at
                     https://dev.elsevier.com/documentation/ScopusSearchAPI.wadl.

        Raises
        ------
        ScopusQueryError
            For non-subscribers, if the number of search results exceeds 5000.

        ValueError
            If any of the parameters `integrity_action`, `refresh` or `view`
            is not one of the allowed values.

        Notes
        -----
        The directory for cached results is `{path}/{view}/{fname}`,
        where `path` is specified in your configuration file and `fname` is
        the md5-hashed version of `query`.
        """
        # Checks
        if view:
            check_parameter_value(view, ('STANDARD', 'COMPLETE'), "view")
        allowed = ("warn", "raise")
        check_parameter_value(integrity_action, allowed, "integrity_action")

        # Parameters
        if not view:
            if subscriber:
                view = "COMPLETE"
            else:
                view = "STANDARD"
        count = 25
        if view == "STANDARD" and subscriber:
            count = 200
        if "cursor" in kwds:
            subscriber = kwds["cursor"]
            kwds.pop("cursor")
        if "count" in kwds:
            count = kwds["count"]
            kwds.pop("count")

        # Query
        self._action = integrity_action
        self._integrity = integrity_fields or []
        self._refresh = refresh
        self._query = query
        self._view = view
        Search.__init__(self, query=query, api='ScopusSearch', count=count,
                        cursor=subscriber, download=download,
                        verbose=verbose, **kwds)
Ejemplo n.º 14
0
    def __init__(self,
                 identifier=None,
                 refresh=False,
                 view='META_ABS',
                 id_type=None,
                 **kwds):
        """Interaction with the Abstract Retrieval API.

        Parameters
        ----------
        identifier : str or int
            The identifier of a document.  Can be the Scopus EID, the Scopus
            ID, the PII, the Pubmed-ID or the DOI.

        refresh : bool or int (optional, default=False)
            Whether to refresh the cached file if it exists or not.  If int
            is passed, cached file will be refreshed if the number of days
            since last modification exceeds that value.

        id_type: str (optional, default=None)
            The type of used ID. Allowed values: None, 'eid', 'pii',
            'scopus_id', 'pubmed_id', 'doi'.  If the value is None, the
            function tries to infer the ID type itself.

        view : str (optional, default=META_ABS)
            The view of the file that should be downloaded.  Allowed values:
            META, META_ABS, REF, FULL, where FULL includes all information
            of META_ABS view and META_ABS includes all information of the
            META view.  For details see
            https://dev.elsevier.com/sc_abstract_retrieval_views.html.

        kwds : key-value parings, optional
            Keywords passed on as query parameters.  Must contain fields
            and values listed mentioned in the API specification
            (https://dev.elsevier.com/documentation/AbstractRetrievalAPI.wadl),
            such as "startref" or "refcount".

        Raises
        ------
        ValueError
            If the id_type parameter or the view parameter contains
            invalid entries.

        Examples
        --------
        See https://pybliometrics.readthedocs.io/en/stable/examples/AbstractRetrieval.html.

        Notes
        -----
        The directory for cached results is `{path}/{view}/{identifier}`,
        where `path` is specified in `~/.scopus/config.ini`.  In case
        `identifier` is a DOI, an underscore replaces the forward slash.
        """
        # Checks
        identifier = str(identifier)
        check_parameter_value(view, ('META', 'META_ABS', 'REF', 'FULL'),
                              "view")
        if id_type is None:
            id_type = detect_id_type(identifier)
        else:
            allowed_id_types = ('eid', 'pii', 'scopus_id', 'pubmed_id', 'doi')
            check_parameter_value(id_type, allowed_id_types, "id_type")

        # Load json
        Retrieval.__init__(self,
                           identifier=identifier,
                           id_type=id_type,
                           api='AbstractRetrieval',
                           refresh=refresh,
                           view=view,
                           **kwds)
        self._json = self._json['abstracts-retrieval-response']
        self._head = chained_get(self._json, ["item", "bibrecord", "head"], {})
        conf_path = [
            'source', 'additional-srcinfo', 'conferenceinfo', 'confevent'
        ]
        self._confevent = chained_get(self._head, conf_path, {})
        if self._view == "REF":
            ref_path = ["references"]
        else:
            ref_path = ['item', 'bibrecord', 'tail', 'bibliography']
        self._ref = chained_get(self._json, ref_path, {})
Ejemplo n.º 15
0
    def __init__(self, eid, start, end=datetime.now().year, citation=None,
                 refresh=False):
        """Interaction witht the Citation Overview API.

        Parameters
        ----------
        eid : str
            The EID of the abstract.

        start : str or int
            The first year for which the citation count should be loaded

        end : str or int (optional, default=datetime.now().year)
            The last year for which the citation count should be loaded.
            Default is the current year.

        citation : str (optional, default=None)
            Allows for the exclusion of self-citations or those by books.
            If None, will count all citations.
            Allowed values: None, exclude-self, exclude-books

        refresh : bool or int (optional, default=False)
            Whether to refresh the cached file if it exists or not.  If int
            is passed, cached file will be refreshed if the number of days
            since last modification exceeds that value.

        Examples
        --------
        See https://pybliometrics.readthedocs.io/en/stable/examples/CitationOverview.html.

        Notes
        -----
        The directory for cached results is `{path}/STANDARD/{eid}`,
        where `path` is specified in `~/.scopus/config.ini`.

        Your API Key needs to be approved by Elsevier to access this API.
        """
        # Checks
        if citation:
            allowed = ('exclude-self', 'exclude-books')
            check_parameter_value(citation, allowed, "citation")

        # Variables
        self._start = int(start)
        self._end = int(end)
        self._citation = citation
        view = "STANDARD"  # In case Scopus adds different views in future

        # Get file content
        date = f'{start}-{end}'
        Retrieval.__init__(self, eid, 'CitationOverview', refresh, view=view,
                           date=date, citation=citation)
        self._data = self._json['abstract-citations-response']

        # citeInfoMatrix
        m = self._data['citeInfoMatrix']['citeInfoMatrixXML']['citationMatrix']['citeInfo'][0]
        self._citeInfoMatrix = _parse_dict(m)
        # identifier-legend
        l = self._data['identifier-legend']['identifier'][0]
        self._identifierlegend = _parse_dict(l)
        # citeColumnTotalXML
        self._citeColumnTotalXML = self._data['citeColumnTotalXML']  # not used
Ejemplo n.º 16
0
    def __init__(self,
                 author_id: Union[int, str],
                 refresh: Union[bool, int] = False,
                 view: str = "ENHANCED",
                 **kwds: str) -> None:
        """Interaction with the Author Retrieval API.

        :param author_id: The ID or the EID of the author.
        :param refresh: Whether to refresh the cached file if it exists or not.
                        If int is passed, cached file will be refreshed if the
                        number of days since last modification exceeds that value.
        :param view: The view of the file that should be downloaded.  Allowed
                     values: METRICS, LIGHT, STANDARD, ENHANCED, where STANDARD
                     includes all information of LIGHT view and ENHANCED
                     includes all information of any view.  For details see
                     https://dev.elsevier.com/sc_author_retrieval_views.html.
                     Note: Neither the BASIC nor the DOCUMENTS view are active,
                     although documented.
        :param kwds: Keywords passed on as query parameters.  Must contain
                     fields and values mentioned in the API specification at
                     https://dev.elsevier.com/documentation/AuthorRetrievalAPI.wadl.

        Raises
        ------
        ValueError
            If any of the parameters `refresh` or `view` is not
            one of the allowed values.

        Notes
        -----
        The directory for cached results is `{path}/ENHANCED/{author_id}`,
        where `path` is specified in your configuration file, and `author_id`
        is stripped of an eventually leading `'9-s2.0-'`.
        """
        # Checks
        allowed_views = ('METRICS', 'LIGHT', 'STANDARD', 'ENHANCED')
        check_parameter_value(view, allowed_views, "view")

        # Load json
        self._id = str(author_id).split('-')[-1]
        self._view = view
        self._refresh = refresh
        Retrieval.__init__(self,
                           identifier=self._id,
                           api='AuthorRetrieval',
                           **kwds)

        # Parse json
        self._json = self._json['author-retrieval-response']
        try:
            self._json = self._json[0]
        except KeyError:  # Incomplete forward
            alias_json = listify(self._json['alias']['prism:url'])
            self._alias = [d['$'].split(':')[-1] for d in alias_json]
            alias_str = ', '.join(self._alias)
            text = f'Author profile with ID {author_id} has been merged and '\
                   f'the main profile is now one of {alias_str}.  Please update '\
                   'your records manually.  Functionality of this object is '\
                   'reduced.'
            warn(text, UserWarning)
        else:
            self._alias = None
        self._profile = self._json.get("author-profile", {})
Ejemplo n.º 17
0
    def __init__(self, identifier, id_type, refresh=False):
        """Interaction with the PlumX Metrics API.

        Parameters
        ----------
        identifier : str
            The identifier of a document.

        id_type: str
            The type of used ID. Allowed values are:
                - 'airitiDocId'
                - 'arxivId'
                - 'cabiAbstractId'
                - 'citeulikeId'
                - 'digitalMeasuresArtifactId'
                - 'doi'
                - 'elsevierId'
                - 'elsevierPii'
                - 'facebookCountUrlId'
                - 'figshareArticleId'
                - 'githubRepoId'
                - 'isbn'
                - 'lccn'
                - 'medwaveId'
                - 'nctId'
                - 'oclc'
                - 'pittEprintDscholarId'
                - 'pmcid'
                - 'pmid'
                - 'redditId'
                - 'repecHandle'
                - 'repoUrl'
                - 'scieloId'
                - 'sdEid'
                - 'slideshareUrlId'
                - 'smithsonianPddrId'
                - 'soundcloudTrackId'
                - 'ssrnId'
                - 'urlId'
                - 'usPatentApplicationId'
                - 'usPatentPublicationId'
                - 'vimeoVideoId'
                - 'youtubeVideoId'

        refresh : bool or int (optional, default=False)
            Whether to refresh the cached file if it exists or not.  If int
            is passed, cached file will be refreshed if the number of days
            since last modification exceeds that value.

        Examples
        --------
        See https://pybliometrics.readthedocs.io/en/stable/examples/PlumXMetrics.html.

        Notes
        -----
        The directory for cached results is `{path}/ENHANCED/{identifier}`,
        where `path` is specified in `~/.scopus/config.ini`.
        """
        allowed = ('airitiDocId', 'arxivId', 'cabiAbstractId', 'citeulikeId',
                   'digitalMeasuresArtifactId', 'doi', 'elsevierId',
                   'elsevierPii', 'facebookCountUrlId', 'figshareArticleId',
                   'githubRepoId', 'isbn', 'lccn', 'medwaveId', 'nctId',
                   'oclc', 'pittEprintDscholarId', 'pmcid', 'pmid', 'redditId',
                   'repecHandle', 'repoUrl', 'scieloId', 'sdEid',
                   'slideshareUrlId', 'smithsonianPddrId', 'soundcloudTrackId',
                   'ssrnId', 'urlId', 'usPatentApplicationId',
                   'usPatentPublicationId', 'vimeoVideoId', 'youtubeVideoId')
        check_parameter_value(id_type, allowed, "id_type")
        self.id_type = id_type
        self.identifier = identifier
        Retrieval.__init__(self,
                           identifier=identifier,
                           id_type=id_type,
                           api='PlumXMetrics',
                           refresh=refresh,
                           view='ENHANCED')