def test_set_apikey_insttoken(self): """Test case: APIkey and insttoken are set correctly using setters""" my_client = ElsClient("dummy") my_client.api_key = config['apikey'] my_client.inst_token = config['insttoken'] assert my_client.api_key == config['apikey'] assert my_client.inst_token == config['insttoken']
class TestElsAffil: """Test affiliation functionality""" ## Test data aff_uri = "https://api.elsevier.com/content/affiliation/affiliation_id/60101411" aff_id_int = 60101411 aff_id_str = "60101411" ## Test initialization def test_init_uri(self): """ Test case: uri is set correctly during initialization with uri""" myAff = ElsAffil(uri = self.aff_uri) assert myAff.uri == self.aff_uri def test_init_aff_id_int(self): """ Test case: uri is set correctly during initialization with affiliation id as integer""" myAff = ElsAffil(affil_id = self.aff_id_int) assert myAff.uri == self.aff_uri def test_init_aff_id_str(self): """ Test case: uri is set correctly during initialization with affiliation id as string""" myAff = ElsAffil(affil_id = self.aff_id_str) assert myAff.uri == self.aff_uri ## Test reading/writing author profile data bad_client = ElsClient("dummy") good_client = ElsClient(config['apikey'], inst_token = config['insttoken']) good_client.local_dir = str(test_path) myAff = ElsAffil(uri = aff_uri) def test_read_good_bad_client(self): """Test case: using a well-configured client leads to successful read and using a badly-configured client does not.""" assert self.myAff.read(self.bad_client) == False assert self.myAff.read(self.good_client) == True def test_json_to_dict(self): """Test case: the JSON read by the author object from the API is parsed into a Python dictionary""" assert type(self.myAff.data) == dict def test_name_getter(self): """Test case: the name attribute is returned as a non-empty string""" assert (type(self.myAff.name) == str and self.myAff.name != '') def test_write(self): """Test case: the author object's data is written to a file with the author ID in the filename""" self.myAff.write() assert util.file_exist_with_id(self.myAff.data['coredata']['dc:identifier'].split(':')[1]) def test_read_docs(self): self.myAff.read_docs() assert len(self.myAff.doc_list) == int(self.myAff.data['coredata']['document-count'])
class TestFullDoc: """Test ScienceDirect article functionality""" ## Test data full_pii_uri = "https://api.elsevier.com/content/article/pii/S1674927814000082" sd_pii = 'S1674927814000082' full_doi_uri = "https://api.elsevier.com/content/article/doi/10.1016/S1525-1578(10)60571-5" doi = '10.1016/S1525-1578(10)60571-5' ## Test initialization def test_init_uri(self): """ Test case: uri is set correctly during initialization with uri""" myFullDoc = FullDoc(uri = self.full_pii_uri) assert myFullDoc.uri == self.full_pii_uri def test_init_sd_pii(self): """ Test case: uri is set correctly during initialization with ScienceDirect PII""" myFullDoc = FullDoc(sd_pii = self.sd_pii) assert myFullDoc.uri == self.full_pii_uri def test_init_doi(self): """ Test case: uri is set correctly during initialization with DOI""" myFullDoc = FullDoc(doi = self.doi) assert myFullDoc.uri == self.full_doi_uri ## Test reading/writing author profile data bad_client = ElsClient("dummy") good_client = ElsClient(config['apikey'], inst_token = config['insttoken']) good_client.local_dir = str(test_path) myFullDoc = FullDoc(uri = full_pii_uri) def test_read_good_bad_client(self): """Test case: using a well-configured client leads to successful read and using a badly-configured client does not.""" assert self.myFullDoc.read(self.bad_client) == False assert self.myFullDoc.read(self.good_client) == True def test_json_to_dict(self): """Test case: the JSON read by the full article object from the API is parsed into a Python dictionary""" assert type(self.myFullDoc.data) == dict def test_title_getter(self): """Test case: the title attribute is returned as a non-empty string""" assert (type(self.myFullDoc.title) == str and self.myFullDoc.title != '') def test_write(self): """Test case: the full article object's data is written to a file with the ID in the filename""" self.myFullDoc.write() ## TODO: replace following (strung-together replace) with regex assert util.file_exist_with_id( self.myFullDoc.data['coredata']['pii'].replace('-','').replace('(','').replace(')',''))
class TestAbsDoc: """Test Scopus document functionality""" ## Test data abs_uri = "https://api.elsevier.com/content/abstract/scopus_id/84872135457" scp_id_int = 84872135457 scp_id_str = "84872135457" ## Test initialization def test_init_uri(self): """ Test case: uri is set correctly during initialization with uri""" myAbsDoc = AbsDoc(uri = self.abs_uri) assert myAbsDoc.uri == self.abs_uri def test_init_scp_id_int(self): """ Test case: uri is set correctly during initialization with Scopus id as integer""" myAbsDoc = AbsDoc(scp_id = self.scp_id_int) assert myAbsDoc.uri == self.abs_uri def test_init_scp_id_str(self): """ Test case: uri is set correctly during initialization with Scopus id as string""" myAbsDoc = AbsDoc(scp_id = self.scp_id_str) assert myAbsDoc.uri == self.abs_uri ## Test reading/writing author profile data bad_client = ElsClient("dummy") good_client = ElsClient(config['apikey'], inst_token = config['insttoken']) good_client.local_dir = str(test_path) myAbsDoc = AbsDoc(uri = abs_uri) def test_read_good_bad_client(self): """Test case: using a well-configured client leads to successful read and using a badly-configured client does not.""" assert self.myAbsDoc.read(self.bad_client) == False assert self.myAbsDoc.read(self.good_client) == True def test_json_to_dict(self): """Test case: the JSON read by the abstract document object from the API is parsed into a Python dictionary""" assert type(self.myAbsDoc.data) == dict def test_title_getter(self): """Test case: the title attribute is returned as a non-empty string""" assert (type(self.myAbsDoc.title) == str and self.myAbsDoc.title != '') def test_write(self): """Test case: the abstract document object's data is written to a file with the Scopus ID in the filename""" self.myAbsDoc.write() assert util.file_exist_with_id(self.myAbsDoc.data['coredata']['dc:identifier'].split(':')[1])
def main(): # Load author ID list with open('authors.json', 'r', encoding='utf-8') as fp: data = json.load(fp) author_list = data['ids'] ## Load configuration with open("config.json") as con_file: config = json.load(con_file) ## Initialize client client = ElsClient(config['apikey']) client.inst_token = config['insttoken'] get_metrics(client, author_list)
def test_init_apikey_insttoken_path(self): """Test case: APIkey, insttoken and local path are set correctly during initialization""" loc_dir = '\\TEMP' my_client = ElsClient(config['apikey'], inst_token = config['insttoken'], local_dir = loc_dir) assert my_client.api_key == config['apikey'] assert my_client.inst_token == config['insttoken'] assert str(my_client.local_dir) == loc_dir
def search(self): # initialize the keys keygen = self.key_generator() init_key = next(keygen) # Initialize the elsapy client client = ElsClient(init_key, view=self.view) count = 0 folder = Path('result') / f'{self.subject}_{time.strftime("%Y%m%d")}' if not folder.exists(): folder.mkdir(parents=True) for query in self.queries: try: name = next(self.names) name = '_'.join(name) except: # this could happen if your file name contains unexpected characters error_log.info(f'Name error at {query}.') break try: srch =ElsSearch(query, index=self.subject, keygen=keygen) srch.execute(client, get_all=True) count += 1 print(f'Progress: {count}/{self.length}, {query}') if srch.status_code == 400: error_log.info(f'Bad query: {name}') else: search_log.info(f'Results found: {name}, # of results: {len(srch.results)}') self.write_json(srch.results, name, folder) except Exception as e: error_log.info(f'Search error: {name}, {str(e)}')
def get_pubs_org_from_api(org_id: str, api_key=None) -> Optional[json.dumps]: """ Loads and returns data on publications of organization from Scopus via API. """ client = ElsClient(api_key) search = ElsSearch(f"(AF-ID({org_id}))", 'scopus') # AND PUBYEAR > 2019 # todo переписать в асинхронном режиме search.execute(client, get_all=True) # загружаем данные по публикациям организации if client.req_status['status_code'] != 200: return None pubs = search.results logging.info(f'{len(pubs)} publications received') # составляем список тасков для загрузки данных по авторам tasks = defaultdict(list) for i, res in enumerate(pubs): for authors_link in res['link']: if authors_link['@ref'] == 'author-affiliation': tasks[i] = authors_link['@href'] break header = get_header(api_key) result = async_fetch_urls(tasks.values(), header) for i, j in zip(tasks.keys(), result): pubs[i]['authors'] = j return pubs
def initClient(self): ## Initialize client try: self.client = ElsClient(self.config['apikey']) self.client.inst_token = self.config['insttoken'] except: pass
def auth(self): con_file = open(self.config_path) config = json.load(con_file) con_file.close() self.client = ElsClient(config['apikey']) self.client.inst_token = config['insttoken']
def search_my_query(my_query): ''' Function to search a query in scopus :param my_query: string of query desired to be searched in scopus :return: resultant dataframe with query from scopus ''' if type(my_query) == str: ## Load configuration con_file = open("config.json") config = json.load(con_file) con_file.close() ## Initialize client client = ElsClient(config['APIKey']) ## Initialize doc search object using Scopus and execute search, retrieving all results print('......Searching Scopus......') print('......for..... ' + query + ' ....') doc_srch = ElsSearch(query, 'scopus') doc_srch.execute(client, get_all=True) print("doc_srch has", len(doc_srch.results), "results.") return doc_srch.results_df else: print('the query must be a string. no searches run...') return
def __init__(self, con_path): from elsapy.elsclient import ElsClient from elsapy.elssearch import ElsSearch with open(con_path) as con_file: self.config = json.load(con_file) self.client = ElsClient(self.config['apikey'])
def __init__(self): ## Load configuration con_file = open("config.json") config = json.load(con_file) con_file.close() ## Initialize client self.client = ElsClient(config['apikey']) self.client.inst_token = config['insttoken']
def initialiseScopus(): ## Load configuration con_file = open("config.json") config = json.load(con_file) con_file.close() ## Initialize client client = ElsClient(config['apikey']) return client
def __init__(self, path): self.path = path con_file = open("config.json") config = json.load(con_file) con_file.close() ## Initialize client self.client = ElsClient(config['apikey']) self.client.inst_token = config['insttoken']
def getInfoAboutTeacher(person): # Load configuration con_file = open(SCOPUS_CREDENTIAL_FILE) config = json.load(con_file) con_file.close() # Initialize client client = ElsClient(config['apikey']) client.inst_token = config['insttoken'] # Initialize author with uri my_auth = ElsAuthor( uri='https://api.elsevier.com/content/author/author_id/' + str(person.scopusId)) # Read author data, then write to disk if my_auth.read(client): return my_auth.data['coredata'] else: print("Read author failed.")
def __init__(self): ## Load configuration con_file = open("config.json") config = json.load(con_file) con_file.close() ## Initialize client self.client = ElsClient(config['apikey']) self.client.inst_token = config['insttoken'] self.psa = ParseSpecialAuthors() self.countries_by_num_authors = [set() for _ in range(max_num_authors)]
def search(self, query_name): """Do a search. Args: query_name (str): the name of the file in the ./queries/ directory that contains the query. Defaults to "query.txt". Raises: FileNotFoundError if the file query file can not be found. Returns: list: The results. """ query = self._load_query(query_name) client = ElsClient(self.api_key) search = ElsSearch(query, "scopus") search.execute(client) return search.results
class TestSearch: """Test search functionality""" ## Test data base_url = u'https://api.elsevier.com/content/search/' search_types = [ {"query" : "authlast(keuskamp)", "index" : "author"}, {"query" : "affil(amsterdam)", "index" : "affiliation"}, {"query" : "AFFIL(dartmouth) AND AUTHOR-NAME(lewis) AND PUBYEAR > 2011", "index" : "scopus"}, {"query" : "star trek vs star wars", "index" : "sciencedirect"} ] searches = [ ElsSearch(search_type["query"], search_type["index"]) for search_type in search_types] good_client = ElsClient(config['apikey'], inst_token = config['insttoken']) ## Test initialization def test_init_uri(self): """Test case: query, index and uri are set correctly during initialization""" match_all = True for i in range(len(self.search_types)): if (self.searches[i].query != self.search_types[i]['query'] or self.searches[i].index != self.search_types[i]['index'] or self.searches[i].uri != (self.base_url + self.search_types[i]['index'] + '?query=' + url_encode(self.search_types[i]['query']))): match_all = False assert match_all == True def test_execution(self): '''Test case: all searches are executed without raising an exception.''' for search in self.searches: search.execute(self.good_client) assert True
def find_articles(year=None, issn=None, get_all=True, id_type="doi", apikey=None): """ Returns a list of the DOI's for all articles published in the specified year and journal. Args: year (str): year of publication issn (str): ISSN (or EISSN) of journal get_all (bool): Whether all results should be returned or just the 1st result. Default is True. id_type: (str) Return document eids or dois. Default is doi. Returns: ids (str): The eids/dois for all articles published in corresponding journal in the specified year """ query = build_scopus_query(year=year, issn=issn) if apikey: CLIENT = ElsClient(apikey, num_res=10000) search = ElsSearch( query, index='scopus', ) search.execute(els_client=CLIENT, get_all=get_all) if id_type == "doi": key = 'prism:doi' else: key = id_type ids = [] for r in search.results: try: ids.append(r[key]) except: continue return ids
def get_doc(self, dtype, identity): """ This method retrieves a 'Doc' object from the Elsevier API. The doc object contains metadata and full-text information about a publication associated with a given PII. Parameters: ----------- dtype(str,required): The type of identification string being used to access the document. (Almost always PII in our case.) identity: The actual identification string/ PII that will be used to query. """ if dtype == 'pii': doc = FullDoc(sd_pii = identity) elif dtype == 'doi': doc= FullDoc(doi = identity) if doc.read(ElsClient(self.API_list[0])): #print ("doc.title: ", doc.title) doc.write() else: print ("Read document failed.") return doc
class TestElsAuthor: """Test author object functionality""" ## Test data auth_uri = "https://api.elsevier.com/content/author/author_id/55070335500" auth_id_int = 55070335500 auth_id_str = "55070335500" ## Test initialization def test_init_uri(self): """ Test case: uri is set correctly during initialization with uri""" myAuth = ElsAuthor(uri = self.auth_uri) assert myAuth.uri == self.auth_uri def test_init_auth_id_int(self): """ Test case: uri is set correctly during initialization with author id as integer""" myAuth = ElsAuthor(author_id = self.auth_id_int) assert myAuth.uri == self.auth_uri def test_init_auth_id_str(self): """ Test case: uri is set correctly during initialization with author id as string""" myAuth = ElsAuthor(author_id = self.auth_id_str) assert myAuth.uri == self.auth_uri ## Test reading/writing author profile data bad_client = ElsClient("dummy") good_client = ElsClient(config['apikey'], inst_token = config['insttoken']) good_client.local_dir = str(test_path) myAuth = ElsAuthor(uri = auth_uri) def test_read_good_bad_client(self): """Test case: using a well-configured client leads to successful read and using a badly-configured client does not.""" assert self.myAuth.read(self.bad_client) == False assert self.myAuth.read(self.good_client) == True def test_json_to_dict(self): """Test case: the JSON read by the author object from the API is parsed into a Python dictionary""" assert type(self.myAuth.data) == dict def test_name_getter(self): """Test case: the full name attribute is returned as a non-empty string""" assert (type(self.myAuth.full_name) == str and self.myAuth.full_name != '') def test_write(self): """Test case: the author object's data is written to a file with the author ID in the filename""" self.myAuth.write() assert util.file_exist_with_id(self.myAuth.data['coredata']['dc:identifier'].split(':')[1]) def test_read_docs(self): self.myAuth.read_docs() assert len(self.myAuth.doc_list) > 0 ## TODO: once author metrics inconsistency is resolved, change to: # assert len(self.myAuth.doc_list) == int(self.myAuth.data['coredata']['document-count']) def test_read_metrics_new_author(self): myAuth = ElsAuthor(uri = self.auth_uri) myAuth.read_metrics(self.good_client) assert ( myAuth.data['coredata']['citation-count'] and myAuth.data['coredata']['cited-by-count'] and myAuth.data['coredata']['document-count'] and myAuth.data['h-index']) def test_read_metrics_existing_author(self): self.myAuth.read_metrics(self.good_client) assert ( self.myAuth.data['coredata']['citation-count'] and self.myAuth.data['coredata']['cited-by-count'] and self.myAuth.data['coredata']['document-count'] and self.myAuth.data['h-index'])
import pandas as pd from elsapy.elsclient import ElsClient from elsapy.elssearch import ElsSearch import json fefu_id = '60103811' con_file = open("config.json") config = json.load(con_file) con_file.close() # view = 'COMPLETE' -- to access more fields client = ElsClient(config['apikey'], num_res = 25) search = ElsSearch('AF-ID( ' + fefu_id + ' )', 'scopus') search.execute(client) sr = search.results result = [] res = {} authorname = '' authid = '' special_fields = ['authname', 'authid', 'prism:coverDisplayDate', 'prism:pageRange', 'openaccessFlag', 'link', 'prism:coverDate'] fields = { 'authname' : 'Authors', 'authid' : 'Author(s) ID', 'dc:title' : 'Title', 'prism:coverDate' : 'Year', 'prism:publicationName' : 'Source title', 'prism:volume' : 'Volume', 'prism:doi' : 'DOI',
from elsapy.elsprofile import ElsAuthor, ElsAffil from elsapy.elsdoc import FullDoc, AbsDoc from elsapy.elssearch import ElsSearch import json import pprint from rdflib import Graph, RDF, Namespace, Literal, URIRef from rdflib import URIRef, BNode, Literal # Load configuration con_file = open("config.json") config = json.load(con_file) con_file.close() ## Initialize client client = ElsClient(config['apikey']) # client.inst_token = config['insttoken'] ## Author example # Initialize author with uri URI = "https://krr.cs.vu.nl/" data = {"data": "24.3"} data_json = json.dumps(data) headers = {'Accept': 'application/json'} my_auth = ElsAuthor( uri='https://api.elsevier.com/content/author/author_id/7004322609') #data from Elsevier
"""An example program that uses the elsapy module""" from elsapy.elsclient import ElsClient from elsapy.elsprofile import ElsAuthor, ElsAffil from elsapy.elsdoc import FullDoc, AbsDoc from elsapy.elssearch import ElsSearch import json ## Load configuration con_file = open("config.json") config = json.load(con_file) con_file.close() ## Initialize client client = ElsClient(config['apikey']) # client.inst_token = config['insttoken'] ## Author example # Initialize author with uri data = {"data" : "24.3"} data_json = json.dumps(data) headers = {'Accept': 'application/json'} my_auth = ElsAuthor( uri = 'https://api.elsevier.com/content/author/author_id/7004322609') resp = client.exec_request('https://api.elsevier.com/content/author/author_id/7004322609?apiKey=2af6a3b4ba3f250031381ec4c63c1c5e')
def test_init_apikey_insttoken(self): """Test case: APIkey and insttoken are set correctly during initialization""" my_client = ElsClient(config['apikey'], inst_token = config['insttoken']) assert my_client.api_key == config['apikey'] assert my_client.inst_token == config['insttoken']
# ------------------------------------------------------------------------------- #import elsapy from elsapy.elsclient import ElsClient from elsapy.elsprofile import ElsAuthor from elsapy.elssearch import ElsSearch import pandas as pd names_csv = 'sustainability-persons_no_sir_sub.csv' df = pd.read_csv(names_csv, error_bad_lines=False) API_KEY = 'd54807cb12735c3d461f169c0ae75a2e' ## Initialize client client = ElsClient(API_KEY) query = 'AUTHFIRST(%s) AND AUTHLASTNAME(%s)' # AND AF-ID(60003892)' #name_list = df["Name"][86].split() #first,last = name_list[0],name_list[len(name_list)-1] name = df["Name"].iloc[26] profile_urls = [] for name in df["Name"]: name_list = name.split() first, last = name_list[0], name_list[len(name_list) - 1] auth_srch = ElsSearch(query % (first, last), 'author') auth_srch.execute(client) #print ("auth_srch has", len(auth_srch.results), "results.") try:
from elsapy.elsclient import ElsClient from elsapy.elsprofile import ElsAuthor, ElsAffil from elsapy.elsdoc import FullDoc, AbsDoc from elsapy.elssearch import ElsSearch import json import requests import pandas as pd ## Load configuration con_file = open("config.json") config = json.load(con_file) con_file.close() ## Initialize client client = ElsClient(config['apikey']) client.inst_token = config['insttoken'] ## Initialize author search object and execute search auth_srch = ElsSearch('authlast(torney)+AUTHFIRST(colin)', 'author') auth_srch.execute(client) print("auth_srch has", len(auth_srch.results), "results.") auth_srch = ElsSearch('co-author(24588214300)', 'author') auth_srch.execute(client) aid = auth_srch.results[0]['dc:identifier'] aid = aid[-11:] auth_srch.results[0]['preferred-name'] getit = 'http://api.elsevier.com/content/search/author?query=AUTHLASTNAME%28Torney%29&apiKey=' + config[ 'apikey']
with open('config.json') as config_file: config = json.load(config_file) GET_ALL = config[ 'get_all'] # False gets one chunk (25) True gets all or max (5000) FULL_TEXT = config['full_text'] # Save fulltext OPEN_ACCESS = config[ 'open_access'] # Search only openaccess documents (so we can get the full text) # "public policy AND (impact OR result OR evaluation OR evidence) AND (climate OR environment)" query = config['query'] if OPEN_ACCESS: query = "openaccess(1) AND " + query client = ElsClient(config['api_key']) doc_srch = ElsSearch(query, 'sciencedirect') doc_srch.execute(client, get_all=GET_ALL) for doc in doc_srch.results: doi = doc['dc:identifier'] print(doi) if FULL_TEXT: ## ScienceDirect (full-text) document example using DOI doi_doc = FullDoc(doi=doi) if doi_doc.read(client): doi_doc.write() else: print("Read full-text failed for DOI", doi)
def main(): # Load author names list with open('authors.json', 'r', encoding='utf-8') as fp: data = json.load(fp) search_list = data['names'] # Load configuration con_file = open("config.json") config = json.load(con_file) con_file.close() # Initialize client client = ElsClient(config['apikey']) client.inst_token = config['insttoken'] # Run search for each author names in list and get IDs auth_id_list = [] for author in search_list: search_query = "" if len(author[0]) > 0: search_query += f"authfirst({author[0]}) " if len(author[1]) > 0: search_query += f"authlast({author[1]})" auth_srch = ElsSearch(search_query, 'author') auth_srch.execute(client) print( f'\n{author[0]} {author[1]}: {len(auth_srch.results)} results found!\n' ) # If there are more than one author that matches the search, display search results if len(auth_srch.results) > 1: for i, search_result in enumerate(auth_srch.results): first_name = search_result['preferred-name']['given-name'] surname = search_result['preferred-name']['surname'] try: affiliation = search_result['affiliation-current'][ 'affiliation-name'] affiliation_country = search_result['affiliation-current'][ 'affiliation-country'] except KeyError: affiliation = '' affiliation_country = '' print( f"[{i+1}] {first_name} {surname}, {affiliation} ({affiliation_country})" ) # Choose desired author desired_author_index = int(input('\nChoose correct author: ')) - 1 else: desired_author_index = 0 # Get author ID desired_author = auth_srch.results[desired_author_index] link = desired_author['link'][0]['@href'] auth_id = desired_author['dc:identifier'].split(':')[1] auth_id_list.append(auth_id) # Save author ID to JSON with open('authors.json', 'w', encoding='utf-8') as fp: data = {'ids': auth_id_list, 'names': search_list} json.dump(data, fp, indent=4, sort_keys=True) print(link) print('\n-----------\n') print('Grabbing author metrics...') get_author_by_id.get_metrics(client, auth_id_list)