def __init__(self, author_id, refresh=False, view="ENHANCED"): """Interaction with the Author Retrieval API. Parameters ---------- author_id : str or int The ID of the author to search for. Optionally expressed as an Elsevier EID (i.e., in the form 9-s2.0-nnnnnnnn). refresh : bool or int (optional, default=False) Whether to refresh the cached file if it exists or not. If int is passed, cached file will be refreshed if the number of days since last modification exceeds that value. view : str (optional, default=META_ABS) The view of the file that should be downloaded. Allowed values: METRICS, LIGHT, STANDARD, ENHANCED, where STANDARD includes all information of LIGHT view and ENHANCED includes all information of any view. For details see https://dev.elsevier.com/sc_author_retrieval_views.html. Note: Neither the BASIC nor the DOCUMENTS view are not active, although documented. Examples -------- See https://pybliometrics.readthedocs.io/en/stable/examples/AuthorRetrieval.html. Notes ----- The directory for cached results is `{path}/ENHANCED/{author_id}`, where `path` is specified in `~/.scopus/config.ini` and `author_id` is stripped of an eventually leading `'9-s2.0-'`. """ # Checks allowed_views = ('METRICS', 'LIGHT', 'STANDARD', 'ENHANCED') check_parameter_value(view, allowed_views, "view") # Load json self._id = str(int(str(author_id).split('-')[-1])) Retrieval.__init__(self, identifier=self._id, api='AuthorRetrieval', refresh=refresh, view=view) self._json = self._json['author-retrieval-response'] try: self._json = self._json[0] except KeyError: # Incomplete forward alias_json = listify(self._json['alias']['prism:url']) self._alias = [d['$'].split(':')[-1] for d in alias_json] alias_str = ', '.join(self._alias) text = f'Author profile with ID {author_id} has been merged and '\ f'the main profile is now one of {alias_str}. Please update '\ 'your records manually. Functionality of this object is '\ 'reduced.' warn(text, UserWarning) else: self._alias = None self._profile = self._json.get("author-profile", {})
def __init__(self, query, refresh=False, view='ENHANCED'): """Interaction with the Serial Title API. Parameters ---------- query: dict Query parameters and corresponding fields. Allowed keys 'title', 'issn', 'pub', 'subj', 'subjCode', 'content', 'oa'. For examples on possible values, please refer to https://dev.elsevier.com/documentation/SerialTitleAPI.wadl#d1e22. refresh : bool or int (optional, default=False) Whether to refresh the cached file if it exists or not. If int is passed, cached file will be refreshed if the number of days since last modification exceeds that value. view : str (optional, default="ENHANCED") The view of the file that should be downloaded. Allowed values: STANDARD, ENHANCED, CITESCORE. For details see https://dev.elsevier.com/sc_serial_title_views.html. Raises ------ Scopus400Error If provided value for a query key is invalid or if for non-subscribers the number of search results exceeds 5000. ValueError If view parameter is not one of allowed ones or if query contains invalid fields. Examples -------- See https://pybliometrics.readthedocs.io/en/stable/examples/SerialSearch.html. Notes ----- The directory for cached results is `{path}/{view}/{fname}`, where `path` is specified in `~/.scopus/config.ini` and fname is the md5-hashed version of `query` dict turned into string in format of 'key=value' delimited by '&'. """ # Checks allowed_query_keys = ('title', 'issn', 'date', 'pub', 'subj', 'subjCode', 'content', 'oa') invalid = [k for k in query.keys() if k not in allowed_query_keys] if invalid: raise ValueError(f'Query key(s) "{", ".join(invalid)}" invalid') check_parameter_value(view, ('STANDARD', 'ENHANCED', 'CITESCORE'), "view") # Query self.query = str(query) Search.__init__(self, query=query, api='SerialSearch', refresh=refresh, view=view) self._n = len(self._json['serial-metadata-response'].get('entry', []))
def __init__(self, query: Dict, refresh: Union[bool, int] = False, view: str = 'ENHANCED', **kwds: str) -> None: """Interaction with the Serial Title API. :param query: Query parameters and corresponding fields. Allowed keys 'title', 'issn', 'pub', 'subj', 'subjCode', 'content', 'oa'. For examples on possible values, please refer to https://dev.elsevier.com/documentation/SerialTitleAPI.wadl#d1e22. :param refresh: Whether to refresh the cached file if it exists or not. If int is passed, cached file will be refreshed if the number of days since last modification exceeds that value. :param view: The view of the file that should be downloaded. Allowed values: STANDARD, ENHANCED, CITESCORE. For details see https://dev.elsevier.com/sc_serial_title_views.html. :param kwds: Keywords passed on as query parameters. Must contain fields and values listed in the API specification at https://dev.elsevier.com/documentation/SerialTitleAPI.wadl. Raises ------ Scopus400Error If provided value for a query key is invalid or if for non-subscribers the number of search results exceeds 5000. ValueError If any of the parameters `refresh` or `view` is not one of the allowed values. Notes ----- The directory for cached results is `{path}/{view}/{fname}`, where `path` is specified in your configuration file, and `fname` is the md5-hashed version of `query` dict turned into string in format of 'key=value' delimited by '&'. """ # Checks allowed_query_keys = ('title', 'issn', 'date', 'pub', 'subj', 'subjCode', 'content', 'oa') invalid = [k for k in query.keys() if k not in allowed_query_keys] if invalid: raise ValueError(f'Query key(s) "{", ".join(invalid)}" invalid') check_parameter_value(view, ('STANDARD', 'ENHANCED', 'CITESCORE'), "view") # Query self._query = str(query) self._refresh = refresh self._view = view Search.__init__(self, query=query, api='SerialSearch', **kwds) self._n = len(self._json['serial-metadata-response'].get('entry', []))
def __init__(self, issn, refresh=False, view="ENHANCED", years=None): """Interaction with the Serial Title API. Parameters ---------- issn : str or int The ISSN or the E-ISSN of the source. refresh : bool or int (optional, default=False) Whether to refresh the cached file if it exists or not. If int is passed, cached file will be refreshed if the number of days since last modification exceeds that value. view : str (optional, default="ENHANCED") The view of the file that should be downloaded. Allowed values: BASIC, STANDARD, ENHANCED. For details see https://dev.elsevier.com/sc_serial_title_views.html. years : str (optional, default=None) A string specifying a year or range of years (combining two years with a hyphen) for which yearly metric data (SJR, SNIP, yearly-data) should be looked up for. If None, only the most recent metric data values are provided. Note: If not None, refresh will always be True. Examples -------- See https://pybliometrics.readthedocs.io/en/stable/examples/SerialTitle.html. Notes ----- The directory for cached results is `{path}/{view}/{source_id}`, where `path` is specified in `~/.scopus/config.ini`. """ # Checks check_parameter_value(view, ('BASIC', 'STANDARD', 'ENHANCED'), "view") # Load json self._id = str(issn) self._years = years # Force refresh when years is specified if years: refresh = True Retrieval.__init__(self, identifier=self._id, view=view, date=years, api='SerialTitle', refresh=refresh) self._json = self._json['serial-metadata-response'] self._entry = self._json['entry'][0]
def __init__(self, aff_id, refresh=False, view="STANDARD"): """Interaction with the Affiliation Retrieval API. Parameters ---------- aff_id : str or int The Scopus Affiliation ID. Optionally expressed as an Elsevier EID (i.e., in the form 10-s2.0-nnnnnnnn). refresh : bool or int (optional, default=False) Whether to refresh the cached file if it exists or not. If int is passed, cached file will be refreshed if the number of days since last modification exceeds that value. view : str (optional, default=STANDARD) The view of the file that should be downloaded. Allowed values: LIGHT, STANDARD, where STANDARD includes all information of the LIGHT view. For details see https://dev.elsevier.com/sc_affil_retrieval_views.html. Note: Neither the BASIC view nor DOCUMENTS or AUTHORS views are active, although documented. Examples -------- See https://pybliometrics.readthedocs.io/en/stable/examples/AffiliationRetrieval.html. Notes ----- The directory for cached results is `{path}/{view}/{aff_id}`, where `path` is specified in `~/.scopus/config.ini`. """ # Checks check_parameter_value(view, ('LIGHT', 'STANDARD'), "view") # Load json aff_id = str(int(str(aff_id).split('-')[-1])) Retrieval.__init__(self, identifier=aff_id, view=view, refresh=refresh, api='AffiliationRetrieval') self._json = self._json['affiliation-retrieval-response'] self._profile = self._json.get("institution-profile", {})
def __init__(self, aff_id: Union[int, str], refresh: Union[bool, int] = False, view: str = "STANDARD", **kwds: str) -> None: """Interaction with the Affiliation Retrieval API. :param aff_id: Scopus ID or EID of the affiliation profile. :param refresh: Whether to refresh the cached file if it exists or not. If int is passed, cached file will be refreshed if the number of days since last modification exceeds that value. :param view: The view of the file that should be downloaded. Allowed values: LIGHT, STANDARD, where STANDARD includes all information of the LIGHT view. For details see https://dev.elsevier.com/sc_affil_retrieval_views.html. Note: Neither the BASIC view nor DOCUMENTS or AUTHORS views are active, although documented. :param kwds: Keywords passed on as query parameters. Must contain fields and values mentioned in the API specification at https://dev.elsevier.com/documentation/AffiliationRetrievalAPI.wadl. Raises ------ ValueError If any of the parameters `refresh` or `view` is not one of the allowed values. Notes ----- The directory for cached results is `{path}/{view}/{aff_id}`, where `path` is specified in your configuration file. """ # Checks check_parameter_value(view, ('LIGHT', 'STANDARD'), "view") # Load json self._view = view self._refresh = refresh aff_id = str(int(str(aff_id).split('-')[-1])) Retrieval.__init__(self, aff_id, api='AffiliationRetrieval', **kwds) self._json = self._json['affiliation-retrieval-response'] self._profile = self._json.get("institution-profile", {})
def __init__(self, identifier: List[Union[int, str]], start: Union[int, str], end: Union[int, str] = datetime.now().year, id_type: str = "scopus_id", eid: str = None, refresh: Union[bool, int] = False, citation: Optional[str] = None, **kwds: str) -> None: """Interaction witht the Citation Overview API. :param identifier: Up to 25 identifiers for which to look up citations. Must be Scopus IDs, DOIs, PIIs or Pubmed IDs. :param start: The first year for which the citation count should be loaded. :param end: The last year for which the citation count should be loaded. Defaults to the current year. :param id_type: The type of the IDs provided in `identifier`. Must be one of "scopus_id", "doi", "pii", "pubmed_id". :param eid: (deprecated) The Scopus ID of the abstract - will be removed in a future release: Instead use param `scopus_id` after stripping the part until the second hyphen. If you use this parameter, it will be converted to `scopus_id` instead. :param refresh: Whether to refresh the cached file if it exists or not. If int is passed, cached file will be refreshed if the number of days since last modification exceeds that value. :param citation: Allows for the exclusion of self-citations or those by books. If `None`, will count all citations. Allowed values: None, exclude-self, exclude-books :param kwds: Keywords passed on as query parameters. Must contain fields and values mentioned in the API specification at https://dev.elsevier.com/documentation/AbstractCitationAPI.wadl. Raises ----- ValueError If parameter `identifier` contains fewer than 1 or more than 25 elements. ValueError If any of the parameters `citation`, `id_type` or `refresh` is not one of the allowed values. Notes ----- The directory for cached results is `{path}/STANDARD/{id}-{citation}`, where `path` is specified in your configuration file, and `id` the md5-hashed version of a string joining `identifier` on underscore. Your API Key needs to be augmented by Elsevier's Scopus Integration Team to access this API. """ # Checks allowed = ('scopus_id', 'doi', 'pii', 'pubmed_id') check_parameter_value(id_type, allowed, "id_type") if citation: allowed = ('exclude-self', 'exclude-books') check_parameter_value(citation, allowed, "citation") if eid or not isinstance(identifier, list): msg = "Parameter `eid` is deprecated and will be removed in a "\ "future release. Instead, provide the corresponding "\ "Scopus ID via parameter `identifier` as a list, and set "\ "`id_type='scopus_id'`." warn(msg, FutureWarning) if len(identifier) < 0 or len(identifier) > 25: msg = "Provide at least 1 and at most than 25 identifiers" raise ValueError(msg) # Variables identifier = [str(i) for i in identifier] self._start = int(start) self._end = int(end) self._citation = citation self._refresh = refresh self._view = "STANDARD" # Get file content date = f'{start}-{end}' kwds.update({id_type: identifier}) stem = md5("_".join(identifier).encode('utf8')).hexdigest() Retrieval.__init__(self, stem, api='CitationOverview', date=date, citation=citation, **kwds) self._data = self._json['abstract-citations-response'] # citeInfoMatrix matrix = self._data['citeInfoMatrix']['citeInfoMatrixXML'][ 'citationMatrix']['citeInfo'] self._citeInfoMatrix = [_parse_dict(e) for e in matrix] # identifier-legend identifier = self._data['identifier-legend']['identifier'] self._identifierlegend = [_parse_dict(e) for e in identifier] # citeCountHeader self._citeCountHeader = self._data['citeColumnTotalXML'][ "citeCountHeader"]
def __init__(self, query, refresh=False, download=True, count=200, integrity_fields=None, integrity_action="raise", verbose=False): """Interaction with the Affiliation Search API. Parameters ---------- query : str A string of the query, e.g. "af-id(60021784)". refresh : bool or int (optional, default=False) Whether to refresh the cached file if it exists or not. If int is passed, cached file will be refreshed if the number of days since last modification exceeds that value. count : int (optional, default=200) The number of entries to be displayed at once. A smaller number means more queries with each query having less results. download : bool (optional, default=True) Whether to download results (if they have not been cached). integrity_fields : None or iterable (default=None) Iterable of field names whose completeness should be checked. ScopusSearch will perform the action specified in `integrity_action` if elements in these fields are missing. This helps avoiding idiosynchratically missing elements that should always be present, such as the EID or the name. integrity_action : str (optional, default="raise") What to do in case integrity of provided fields cannot be verified. Possible actions: - "raise": Raise an AttributeError - "warn": Raise a UserWarning verbose : bool (optional, default=False) Whether to print a downloading progress bar to terminal. Has no effect for download=False. Raises ------ ScopusQueryError If the number of search results exceeds 5000. ValueError If the integrity_action parameter is not one of the allowed ones. Examples -------- See https://pybliometrics.readthedocs.io/en/stable/examples/AffiliationSearch.html. Notes ----- The directory for cached results is `{path}/STANDARD/{fname}`, where `path` is specified in `~/.scopus/config.ini` and fname is the md5-hashed version of `query`. """ # Check allowed = ("warn", "raise") check_parameter_value(integrity_action, allowed, "integrity_action") # Query self.query = query Search.__init__(self, query=query, api="AffiliationSearch", refresh=refresh, count=count, download=download, verbose=verbose, view="STANDARD") self.integrity = integrity_fields or [] self.action = integrity_action
def __init__(self, identifier: str, id_type: str, refresh: Union[bool, int] = False, **kwds: str ) -> None: """Interaction with the PlumX Metrics API. :param identifier: The identifier of a document. :param id_type: The type of used ID. Allowed values are: - 'airitiDocId' - 'arxivId' - 'cabiAbstractId' - 'citeulikeId' - 'digitalMeasuresArtifactId' - 'doi' - 'elsevierId' - 'elsevierPii' - 'facebookCountUrlId' - 'figshareArticleId' - 'githubRepoId' - 'isbn' - 'lccn' - 'medwaveId' - 'nctId' - 'oclc' - 'pittEprintDscholarId' - 'pmcid' - 'pmid' - 'redditId' - 'repecHandle' - 'repoUrl' - 'scieloId' - 'sdEid' - 'slideshareUrlId' - 'smithsonianPddrId' - 'soundcloudTrackId' - 'ssrnId' - 'urlId' - 'usPatentApplicationId' - 'usPatentPublicationId' - 'vimeoVideoId' - 'youtubeVideoId' :param refresh: Whether to refresh the cached file if it exists or not. If int is passed, cached file will be refreshed if the number of days since last modification exceeds that value. :param kwds: Keywords passed on as query parameters. Must contain fields and values mentioned in the API specification at https://dev.elsevier.com/documentation/PlumXMetricsAPI.wadl. Raises ------ ValueError If the parameter `refresh` is not one of the allowed values. Notes ----- The directory for cached results is `{path}/ENHANCED/{identifier}`, where `path` is specified in your configuration file. """ # Checks allowed = ('airitiDocId', 'arxivId', 'cabiAbstractId', 'citeulikeId', 'digitalMeasuresArtifactId', 'doi', 'elsevierId', 'elsevierPii', 'facebookCountUrlId', 'figshareArticleId', 'githubRepoId', 'isbn', 'lccn', 'medwaveId', 'nctId', 'oclc', 'pittEprintDscholarId', 'pmcid', 'pmid', 'redditId', 'repecHandle', 'repoUrl', 'scieloId', 'sdEid', 'slideshareUrlId', 'smithsonianPddrId', 'soundcloudTrackId', 'ssrnId', 'urlId', 'usPatentApplicationId', 'usPatentPublicationId', 'vimeoVideoId', 'youtubeVideoId') check_parameter_value(id_type, allowed, "id_type") self._id_type = id_type self._identifier = identifier # Load json self._refresh = refresh self._view = 'ENHANCED' Retrieval.__init__(self, identifier=identifier, id_type=id_type, api='PlumXMetrics', **kwds) cats = self._json.get('count_categories', []) self._count_categories = {d["name"]: d['count_types'] for d in cats}
def __init__(self, query: str, refresh: Union[bool, int] = False, verbose: bool = False, download: bool = True, integrity_fields: Union[List[str], Tuple[str, ...]] = None, integrity_action: str = "raise", count: int = 200, **kwds: str) -> None: """Interaction with the Author Search API. :param query: A string of the query. For allowed fields and values see https://dev.elsevier.com/sc_author_search_tips.html. :param refresh: Whether to refresh the cached file if it exists or not. If int is passed, cached file will be refreshed if the number of days since last modification exceeds that value. :param download: Whether to download results (if they have not been cached). :param verbose: Whether to print a download progress bar. :param integrity_fields: Names of fields whose completeness should be checked. ScopusSearch will perform the action specified in `integrity_action` if elements in these fields are missing. This helps avoiding idiosynchratically missing elements that should always be present (e.g., EID or source ID). :param integrity_action: What to do in case integrity of provided fields cannot be verified. Possible actions: - "raise": Raise an AttributeError - "warn": Raise a UserWarning :param count: (deprecated) The number of entries to be displayed at once. A smaller number means more queries with each query having fewer results. :param kwds: Keywords passed on as query parameters. Must contain fields and values mentioned in the API specification at https://dev.elsevier.com/documentation/AuthorSearchAPI.wadl. Raises ------ ScopusQueryError If the number of search results exceeds 5000, which is the API's maximum number of results returned. The error prevents the download attempt and avoids making use of your API key. ValueError If any of the parameters `integrity_action` or `refresh` is not one of the allowed values. Notes ----- The directory for cached results is `{path}/STANDARD/{fname}`, where `path` is specified in your configuration file, and `fname` is the md5-hashed version of `query`. """ # Checks allowed = ("warn", "raise") check_parameter_value(integrity_action, allowed, "integrity_action") if count != 200: msg = "Parameter `count` is deprecated and will be removed in a "\ "future release. There will be no substitute." warn(msg, FutureWarning) # Query self._action = integrity_action self._integrity = integrity_fields or [] self._query = query self._refresh = refresh self._view = "STANDARD" Search.__init__(self, query=query, api='AuthorSearch', count=count, download=download, verbose=verbose, **kwds)
def __init__(self, identifier: Union[int, str] = None, refresh: Union[bool, int] = False, view: str = 'META_ABS', id_type: str = None, **kwds: str) -> None: """Interaction with the Abstract Retrieval API. :param identifier: The identifier of a document. Can be the Scopus EID , the Scopus ID, the PII, the Pubmed-ID or the DOI. :param refresh: Whether to refresh the cached file if it exists or not. If int is passed, cached file will be refreshed if the number of days since last modification exceeds that value. :param id_type: The type of used ID. Allowed values: None, 'eid', 'pii', 'scopus_id', 'pubmed_id', 'doi'. If the value is None, the function tries to infer the ID type itself. :param view: The view of the file that should be downloaded. Allowed values: META, META_ABS, REF, FULL, where FULL includes all information of META_ABS view and META_ABS includes all information of the META view. For details see https://dev.elsevier.com/sc_abstract_retrieval_views.html. :param kwds: Keywords passed on as query parameters. Must contain fields and values listed in the API specification at https://dev.elsevier.com/documentation/AbstractRetrievalAPI.wadl. Raises ------ ValueError If any of the parameters `id_type`, `refresh` or `view` is not one of the allowed values. Notes ----- The directory for cached results is `{path}/{view}/{identifier}`, where `path` is specified in your configuration file. In case `identifier` is a DOI, an underscore replaces the forward slash. """ # Checks identifier = str(identifier) check_parameter_value(view, ('META', 'META_ABS', 'REF', 'FULL'), "view") if id_type is None: id_type = detect_id_type(identifier) else: allowed_id_types = ('eid', 'pii', 'scopus_id', 'pubmed_id', 'doi') check_parameter_value(id_type, allowed_id_types, "id_type") # Load json self._view = view self._refresh = refresh Retrieval.__init__(self, identifier=identifier, id_type=id_type, api='AbstractRetrieval', **kwds) self._json = self._json['abstracts-retrieval-response'] self._head = chained_get(self._json, ["item", "bibrecord", "head"], {}) conf_path = [ 'source', 'additional-srcinfo', 'conferenceinfo', 'confevent' ] self._confevent = chained_get(self._head, conf_path, {}) if self._view == "REF": ref_path = ["references"] else: ref_path = ['item', 'bibrecord', 'tail', 'bibliography'] self._ref = chained_get(self._json, ref_path, {})
def __init__(self, query, refresh=False, subscriber=True, view=None, download=True, integrity_fields=None, integrity_action="raise", verbose=False, **kwds): """Interaction with the Scopus Search API. Parameters ---------- query : str A string of the query. refresh : bool or int (optional, default=False) Whether to refresh the cached file if it exists or not. If int is passed, cached file will be refreshed if the number of days since last modification exceeds that value. subscriber : bool (optional, default=True) Whether the user accesses Scopus with a subscription or not. For subscribers, Scopus's cursor navigation will be used. Sets the number of entries in each query iteration to the maximum number allowed by the corresponding view. view : str (optional, default=None) Which view to use for the query, see https://dev.elsevier.com/sc_search_views.html. Allowed values: STANDARD, COMPLETE. If None, defaults to COMPLETE if subscriber=True and to STANDARD if subscriber=False. download : bool (optional, default=True) Whether to download results (if they have not been cached). integrity_fields : None or iterable (default=None) Iterable of field names whose completeness should be checked. ScopusSearch will perform the action specified in `integrity_action` if elements in these fields are missing. This helps avoiding idiosynchratically missing elements that should always be present, such as the EID or the source ID. integrity_action : str (optional, default="raise") What to do in case integrity of provided fields cannot be verified. Possible actions: - "raise": Raise an AttributeError - "warn": Raise a UserWarning verbose : bool (optional, default=False) Whether to print a downloading progress bar to terminal. Has no effect for download=False or when query file is in cache. kwds : key-value parings, optional Keywords passed on as query parameters. Must contain fields and values listed mentioned in the API specification (https://dev.elsevier.com/documentation/SCOPUSSearchAPI.wadl), such as "field" or "date". Raises ------ ScopusQueryError For non-subscribers, if the number of search results exceeds 5000. ValueError If the view or the integrity_action parameter is not one of the allowed ones. Examples -------- See https://pybliometrics.readthedocs.io/en/stable/examples/ScopusSearch.html. Notes ----- The directory for cached results is `{path}/{view}/{fname}`, where `path` is specified in `~/.scopus/config.ini` and fname is the md5-hashed version of `query`. """ # Checks if view: check_parameter_value(view, ('STANDARD', 'COMPLETE'), "view") allowed = ("warn", "raise") check_parameter_value(integrity_action, allowed, "integrity_action") # Parameters if not view: if subscriber: view = "COMPLETE" else: view = "STANDARD" count = 25 if view == "STANDARD" and subscriber: count = 200 if "cursor" in kwds: subscriber = kwds["cursor"] kwds.pop("cursor") if "count" in kwds: count = kwds["count"] kwds.pop("count") # Query self.query = query Search.__init__(self, query=query, api='ScopusSearch', refresh=refresh, count=count, cursor=subscriber, view=view, download=download, verbose=verbose, **kwds) self.integrity = integrity_fields or [] self.action = integrity_action
def __init__(self, query: str, refresh: Union[bool, int] = False, view: str = None, verbose: bool = False, download: bool = True, integrity_fields: Union[List[str], Tuple[str, ...]] = None, integrity_action: str = "raise", subscriber: bool = True, **kwds: str ) -> None: """Interaction with the Scopus Search API. :param query: A string of the query as used in the Advanced Search on scopus.com. All fields except "INDEXTERMS()" and "LIMIT-TO()" work. :param refresh: Whether to refresh the cached file if it exists or not. If int is passed, cached file will be refreshed if the number of days since last modification exceeds that value. :param view: Which view to use for the query, see https://dev.elsevier.com/sc_search_views.html. Allowed values: STANDARD, COMPLETE. If None, defaults to COMPLETE if `subscriber=True` and to STANDARD if `subscriber=False`. :param verbose: Whether to print a download progress bar. :param download: Whether to download results (if they have not been cached). :param integrity_fields: Names of fields whose completeness should be checked. ScopusSearch will perform the action specified in `integrity_action` if elements in these fields are missing. This helps avoiding idiosynchratically missing elements that should always be present (e.g., EID or source ID). :param integrity_action: What to do in case integrity of provided fields cannot be verified. Possible actions: - "raise": Raise an AttributeError - "warn": Raise a UserWarning :param subscriber: Whether you access Scopus with a subscription or not. For subscribers, Scopus's cursor navigation will be used. Sets the number of entries in each query iteration to the maximum number allowed by the corresponding view. :param kwds: Keywords passed on as query parameters. Must contain fields and values mentioned in the API specification at https://dev.elsevier.com/documentation/ScopusSearchAPI.wadl. Raises ------ ScopusQueryError For non-subscribers, if the number of search results exceeds 5000. ValueError If any of the parameters `integrity_action`, `refresh` or `view` is not one of the allowed values. Notes ----- The directory for cached results is `{path}/{view}/{fname}`, where `path` is specified in your configuration file and `fname` is the md5-hashed version of `query`. """ # Checks if view: check_parameter_value(view, ('STANDARD', 'COMPLETE'), "view") allowed = ("warn", "raise") check_parameter_value(integrity_action, allowed, "integrity_action") # Parameters if not view: if subscriber: view = "COMPLETE" else: view = "STANDARD" count = 25 if view == "STANDARD" and subscriber: count = 200 if "cursor" in kwds: subscriber = kwds["cursor"] kwds.pop("cursor") if "count" in kwds: count = kwds["count"] kwds.pop("count") # Query self._action = integrity_action self._integrity = integrity_fields or [] self._refresh = refresh self._query = query self._view = view Search.__init__(self, query=query, api='ScopusSearch', count=count, cursor=subscriber, download=download, verbose=verbose, **kwds)
def __init__(self, identifier=None, refresh=False, view='META_ABS', id_type=None, **kwds): """Interaction with the Abstract Retrieval API. Parameters ---------- identifier : str or int The identifier of a document. Can be the Scopus EID, the Scopus ID, the PII, the Pubmed-ID or the DOI. refresh : bool or int (optional, default=False) Whether to refresh the cached file if it exists or not. If int is passed, cached file will be refreshed if the number of days since last modification exceeds that value. id_type: str (optional, default=None) The type of used ID. Allowed values: None, 'eid', 'pii', 'scopus_id', 'pubmed_id', 'doi'. If the value is None, the function tries to infer the ID type itself. view : str (optional, default=META_ABS) The view of the file that should be downloaded. Allowed values: META, META_ABS, REF, FULL, where FULL includes all information of META_ABS view and META_ABS includes all information of the META view. For details see https://dev.elsevier.com/sc_abstract_retrieval_views.html. kwds : key-value parings, optional Keywords passed on as query parameters. Must contain fields and values listed mentioned in the API specification (https://dev.elsevier.com/documentation/AbstractRetrievalAPI.wadl), such as "startref" or "refcount". Raises ------ ValueError If the id_type parameter or the view parameter contains invalid entries. Examples -------- See https://pybliometrics.readthedocs.io/en/stable/examples/AbstractRetrieval.html. Notes ----- The directory for cached results is `{path}/{view}/{identifier}`, where `path` is specified in `~/.scopus/config.ini`. In case `identifier` is a DOI, an underscore replaces the forward slash. """ # Checks identifier = str(identifier) check_parameter_value(view, ('META', 'META_ABS', 'REF', 'FULL'), "view") if id_type is None: id_type = detect_id_type(identifier) else: allowed_id_types = ('eid', 'pii', 'scopus_id', 'pubmed_id', 'doi') check_parameter_value(id_type, allowed_id_types, "id_type") # Load json Retrieval.__init__(self, identifier=identifier, id_type=id_type, api='AbstractRetrieval', refresh=refresh, view=view, **kwds) self._json = self._json['abstracts-retrieval-response'] self._head = chained_get(self._json, ["item", "bibrecord", "head"], {}) conf_path = [ 'source', 'additional-srcinfo', 'conferenceinfo', 'confevent' ] self._confevent = chained_get(self._head, conf_path, {}) if self._view == "REF": ref_path = ["references"] else: ref_path = ['item', 'bibrecord', 'tail', 'bibliography'] self._ref = chained_get(self._json, ref_path, {})
def __init__(self, eid, start, end=datetime.now().year, citation=None, refresh=False): """Interaction witht the Citation Overview API. Parameters ---------- eid : str The EID of the abstract. start : str or int The first year for which the citation count should be loaded end : str or int (optional, default=datetime.now().year) The last year for which the citation count should be loaded. Default is the current year. citation : str (optional, default=None) Allows for the exclusion of self-citations or those by books. If None, will count all citations. Allowed values: None, exclude-self, exclude-books refresh : bool or int (optional, default=False) Whether to refresh the cached file if it exists or not. If int is passed, cached file will be refreshed if the number of days since last modification exceeds that value. Examples -------- See https://pybliometrics.readthedocs.io/en/stable/examples/CitationOverview.html. Notes ----- The directory for cached results is `{path}/STANDARD/{eid}`, where `path` is specified in `~/.scopus/config.ini`. Your API Key needs to be approved by Elsevier to access this API. """ # Checks if citation: allowed = ('exclude-self', 'exclude-books') check_parameter_value(citation, allowed, "citation") # Variables self._start = int(start) self._end = int(end) self._citation = citation view = "STANDARD" # In case Scopus adds different views in future # Get file content date = f'{start}-{end}' Retrieval.__init__(self, eid, 'CitationOverview', refresh, view=view, date=date, citation=citation) self._data = self._json['abstract-citations-response'] # citeInfoMatrix m = self._data['citeInfoMatrix']['citeInfoMatrixXML']['citationMatrix']['citeInfo'][0] self._citeInfoMatrix = _parse_dict(m) # identifier-legend l = self._data['identifier-legend']['identifier'][0] self._identifierlegend = _parse_dict(l) # citeColumnTotalXML self._citeColumnTotalXML = self._data['citeColumnTotalXML'] # not used
def __init__(self, author_id: Union[int, str], refresh: Union[bool, int] = False, view: str = "ENHANCED", **kwds: str) -> None: """Interaction with the Author Retrieval API. :param author_id: The ID or the EID of the author. :param refresh: Whether to refresh the cached file if it exists or not. If int is passed, cached file will be refreshed if the number of days since last modification exceeds that value. :param view: The view of the file that should be downloaded. Allowed values: METRICS, LIGHT, STANDARD, ENHANCED, where STANDARD includes all information of LIGHT view and ENHANCED includes all information of any view. For details see https://dev.elsevier.com/sc_author_retrieval_views.html. Note: Neither the BASIC nor the DOCUMENTS view are active, although documented. :param kwds: Keywords passed on as query parameters. Must contain fields and values mentioned in the API specification at https://dev.elsevier.com/documentation/AuthorRetrievalAPI.wadl. Raises ------ ValueError If any of the parameters `refresh` or `view` is not one of the allowed values. Notes ----- The directory for cached results is `{path}/ENHANCED/{author_id}`, where `path` is specified in your configuration file, and `author_id` is stripped of an eventually leading `'9-s2.0-'`. """ # Checks allowed_views = ('METRICS', 'LIGHT', 'STANDARD', 'ENHANCED') check_parameter_value(view, allowed_views, "view") # Load json self._id = str(author_id).split('-')[-1] self._view = view self._refresh = refresh Retrieval.__init__(self, identifier=self._id, api='AuthorRetrieval', **kwds) # Parse json self._json = self._json['author-retrieval-response'] try: self._json = self._json[0] except KeyError: # Incomplete forward alias_json = listify(self._json['alias']['prism:url']) self._alias = [d['$'].split(':')[-1] for d in alias_json] alias_str = ', '.join(self._alias) text = f'Author profile with ID {author_id} has been merged and '\ f'the main profile is now one of {alias_str}. Please update '\ 'your records manually. Functionality of this object is '\ 'reduced.' warn(text, UserWarning) else: self._alias = None self._profile = self._json.get("author-profile", {})
def __init__(self, identifier, id_type, refresh=False): """Interaction with the PlumX Metrics API. Parameters ---------- identifier : str The identifier of a document. id_type: str The type of used ID. Allowed values are: - 'airitiDocId' - 'arxivId' - 'cabiAbstractId' - 'citeulikeId' - 'digitalMeasuresArtifactId' - 'doi' - 'elsevierId' - 'elsevierPii' - 'facebookCountUrlId' - 'figshareArticleId' - 'githubRepoId' - 'isbn' - 'lccn' - 'medwaveId' - 'nctId' - 'oclc' - 'pittEprintDscholarId' - 'pmcid' - 'pmid' - 'redditId' - 'repecHandle' - 'repoUrl' - 'scieloId' - 'sdEid' - 'slideshareUrlId' - 'smithsonianPddrId' - 'soundcloudTrackId' - 'ssrnId' - 'urlId' - 'usPatentApplicationId' - 'usPatentPublicationId' - 'vimeoVideoId' - 'youtubeVideoId' refresh : bool or int (optional, default=False) Whether to refresh the cached file if it exists or not. If int is passed, cached file will be refreshed if the number of days since last modification exceeds that value. Examples -------- See https://pybliometrics.readthedocs.io/en/stable/examples/PlumXMetrics.html. Notes ----- The directory for cached results is `{path}/ENHANCED/{identifier}`, where `path` is specified in `~/.scopus/config.ini`. """ allowed = ('airitiDocId', 'arxivId', 'cabiAbstractId', 'citeulikeId', 'digitalMeasuresArtifactId', 'doi', 'elsevierId', 'elsevierPii', 'facebookCountUrlId', 'figshareArticleId', 'githubRepoId', 'isbn', 'lccn', 'medwaveId', 'nctId', 'oclc', 'pittEprintDscholarId', 'pmcid', 'pmid', 'redditId', 'repecHandle', 'repoUrl', 'scieloId', 'sdEid', 'slideshareUrlId', 'smithsonianPddrId', 'soundcloudTrackId', 'ssrnId', 'urlId', 'usPatentApplicationId', 'usPatentPublicationId', 'vimeoVideoId', 'youtubeVideoId') check_parameter_value(id_type, allowed, "id_type") self.id_type = id_type self.identifier = identifier Retrieval.__init__(self, identifier=identifier, id_type=id_type, api='PlumXMetrics', refresh=refresh, view='ENHANCED')