def search_for_range(filename, time_range, keys): ''' file name is a file name to read scopusid of reviewers time range is a list of 2 string, the first is the start time , the second is the end time. For example: ['2011','2012'] keys are the created api keys, usually 10 keys could cover 70000~80000 searches, it is a list of string, each string is a key ''' scopus = Scopus(key) handler = open(filename, "r") #write_handle = open("abs.txt","w") scs = handler.readlines() #print(len(scs)) #test = scs[0] start = time_range[0] end = time_range[1] epoch = 0 pandasall = [] for scopusid in scs: try: if (epoch > 16000): scopus = Scopus(key2) elif (epoch > 32000): scopus = Scopus(key3) # write_handle.write("author: "+ scopusid) # write_handle.write("\n") kang = scopus.search_author_publication(scopusid) epoch += 1 kang_range = kang[(kang['cover_date'] < end) & (kang['cover_date'] > start)] abstracts = [] ids = kang_range['scopus_id'].tolist() for oneid in ids: abstract = scopus.retrieve_abstract(oneid) abstracts.append(abstract['abstract']) # write_handle.write(abstract['abstract']) # write_handle.write("\n") # write_handle.write("**********\n") epoch += 1 kang_range['abstract'] = pandas.Series(abstracts, kang_range.index) pandasall.append(kang_range) # print(kang_range) # write_handle.write("\n") # write_handle.write("++++++++++\n") print(epoch) except: continue result = pandas.concat(pandasall) result.to_pickle("result.pkl") result.to_csv('result.csv', encoding='utf-8', index=False)
def scopus_search(string): """Run the search string returned by the function string_formulation() in the digital library Scopus via pyscopus. Args: string: Search string to be used. Returns: search_df: Structure containing all search results in Scopus digital library """ results = 3000 # Insert your Scopus API Key key = '7f59af901d2d86f78a1fd60c1bf9426a' scopus = Scopus(key) try: search_df = scopus.search(string, count=results, view='STANDARD', type_=1) except Exception as e: print("Exception: " + str(e)) return -1 pd.options.display.max_rows = 99999 pd.options.display.max_colwidth = 250 search_df[['title']].to_csv(os.path.join(sys.path[0], "exits/result.csv"), index_label=False, encoding='utf-8', index=False, header=True, sep='\t') return int(len(search_df))
from pyscopus import Scopus key = '50312b4710a276ec9670642f0f455259' scopus = Scopus(key) def pencarian(nama): search_df = scopus.search("AUTHLASTNAME(" + nama + ")", count=10) return search_df.head(10)
# -*- coding: utf-8 -*- # pylint:disable=import-error from __future__ import absolute_import import logging import os import re from glob import glob from pathlib import Path from ccdc import io from crossref.restful import Works from pyscopus import Scopus scopus = Scopus(os.environ['SCOPUS_API_KEY']) logger = logging.getLogger() logger.setLevel(logging.INFO) def get_structure_list(directory: str, extension: str = 'cif') -> list: """ Args: directory: extension: Returns: """ logger.info('getting structure list')
from pyscopus import Scopus import requests import json import environ import pprint env = environ.Env() # reading .env file environ.Env.read_env() # API Docs - https://kitchingroup.cheme.cmu.edu/blog/2015/04/03/Getting-data-from-the-Scopus-API/ API_KEY = env('SCOPUS_API_KEY') scopus = Scopus(API_KEY) # author = scopus.retrieve_author('36706629400') # print(author) def get_scopus_data(titles): for title in titles: resp = requests.get( f"http://api.elsevier.com/content/search/scopus?query=TITLE({title})&field=citedby-count", headers={ 'Accept': 'application/json', 'X-ELS-APIKey': API_KEY }) data = json.dumps(resp.json(), sort_keys=True, indent=4, separators=(',', ': '))
import pyscopus from pyscopus import Scopus import pandas as pd key='bc87ef89794f6f531edb4162d12d3e7e' scopus = Scopus(key) authors = pd.read_csv("authors.csv") authors_id=authors['author_id'].values publications=pd.DataFrame(columns=['title', 'cover_date','authors']) for i in range(210,len(authors_id)): try: author_publications_df=scopus.search_author_publication(authors_id[i],count=500) author_publications_df=author_publications_df[['title', 'cover_date','authors']] publications=publications.append(author_publications_df) print(publications) publications.to_csv('titles.csv',index=False,sep='\t') print(i+1) except : pass
import pyscopus from pyscopus import Scopus import pandas as pd key='bc87ef89794f6f531edb4162d12d3e7e' scopus = Scopus(key) subject_area=["SUBJAREA(AGRI)","SUBJAREA(BIOC)","SUBJAREA(BUSI)","SUBJAREA(CENG)","SUBJAREA(CHEM)","SUBJAREA(COMP)","SUBJAREA(DECI)","SUBJAREA(PHAR)","SUBJAREA(MULT)", "SUBJAREA(VETE)","SUBJAREA(SOCI)","SUBJAREA(PSYC)","SUBJAREA(NURS)","SUBJAREA(NEUR)","SUBJAREA(DENT)","SUBJAREA(EART)","SUBJAREA(ECON)","SUBJAREA(ENER)","SUBJAREA(ENGI)","SUBJAREA(ENVI)", "SUBJAREA(HEAL)","SUBJAREA(MATH)","SUBJAREA(IMMU)","SUBJAREA(MATE)","SUBJAREA(MEDI)","SUBJAREA(PHYS)"] authors=pd.DataFrame(columns=['author_id','name','affiliation_id']) for subj in subject_area: try: author_search_df = scopus.search_author(subj,count=2500) author_search_df=author_search_df[['author_id','name','affiliation_id']] authors=authors.append(author_search_df).drop_duplicates() print(authors) authors.to_csv('authors.csv',index=False) except : pass
## '( DOCTYPE(ar) OR DOCTYPE(re) OR DOCTYPE(ip) )' search_term = 'TITLE-ABS-KEY ( infrastructure AND resilience ) AND ' + \ '( DOCTYPE(ar) OR DOCTYPE(re) OR DOCTYPE(ip) )' # File paths path_root = 'P:/ene.yssp/Yaoping_Wang/Applications/2017-18 Job Applications/2018-10-08 Microsoft/' path_json = os.path.join(path_root, 'data') path_out = os.path.join(path_root, 'intermediate', 'infrastructure_resilience') # Load API key configuration con_file = open(os.path.join(path_json, "config.json")) config = json.load(con_file) con_file.close() # Conduct search scopus = Scopus(config['apikey']) search_df = scopus.search(search_term, count=5000) # Save the search results by journal & year of publication # ---- only save journals that have at least 2 papers jour_list = [] for xx in search_df['publication_name'].unique(): if (sum(search_df['publication_name'] == xx) > 1): # ---- adjust some irregularities in journal names xx2 = re.sub('[^a-zA-Z]+', ' ', xx) xx2 = xx2.strip(' ') if ((len(path_out) + len(xx2)) > 200): xx2 = xx2[:(200 - len(path_out))] search_df.loc[search_df['publication_name'] == xx, 'publication_name'] = xx2 jour_list.append(xx2)
import pyscopus from pyscopus import Scopus import pandas as pd key='bc87ef89794f6f531edb4162d12d3e7e' scopus = Scopus(key) authors = pd.read_csv("authors.csv") affiliations_id=authors['affiliation_id'].values affiliations=pd.DataFrame(columns=['affiliation-name','city','country','aff_id']) print(len(affiliations_id)) for aff in affiliations_id: try: affiliation=scopus.retrieve_affiliation(str(int(aff))) affil=pd.DataFrame([[affiliation['affiliation-name'],affiliation['city'],affiliation['country'],affiliation['aff_id']]],columns=affiliations.columns) affiliations=affiliations.append(affil).drop_duplicates() print(affiliations) affiliations.to_csv('affiliations.csv',index=False,sep='\t') except : pass
#Locations database (attached). db = 'Locations.db' connection = sqlite3.connect(db) cursor = connection.cursor() #we have a table of dois and dates for all refcodes (pulled straight from Conquest). cursor.execute( "SELECT t1.refcode, t1.field2,t1.field3, t1.field4 FROM DOI_List t1 LEFT JOIN Locations t2 ON t2.refcode = t1.refcode WHERE t2.refcode IS NULL ") refcodes = cursor.fetchall() connection.close() print len(refcodes) #You need a SCOPUS API key to access the data. key = "" scopus = Scopus(key) #for each entry... for refcode in refcodes: print refcode[0] connection = sqlite3.connect(db) cursor = connection.cursor() row_exsists = len((cursor.execute( "SELECT 1 FROM Fails WHERE refcode = '%s'" % ( refcode[0]))).fetchall()) > 0 # some errors occur, to avoid repeating them I've created a fail table to just get rid of them. if not row_exsists and refcode[0] != "AFUYIL": try: ref = refcode[0] doi = refcode[1] print doi
from pyscopus import Scopus # please generate your own key scopus = Scopus(key) #query_dict = {'affil': 'University of Iowa', 'authfirst': 'Xi', 'authlast': 'Wang'} #query_dict = {'affil': 'Stanford University', 'authfirst': 'Andrew', 'authlast': 'Ng'} #scopus.search_author(query_dict) pub_info = scopus.search_abstract('0141607824') #andrew_pubs = scopus.search_author_publication('35410071600')
#keys = ['d07c3456c71ef4fe35c34d8fe27b4318','30da432d73ea6cbd1ca3ce5540ea28a4', #'6a09f4f9f223b25918ce5f0d668e33ca','e22fa0176eca805467bbf221346fad10', #'03ce6b33a244726d9d6f1ad3019f762f','eff565636066139dfb2954173c6d1d71','a9c1644234d5c8fe9665b532f494a6ec', #'53f2ce45049139b69d36805c0fe0f471', #'63faec376ab55a4efe9ae13be074ecc9', #'a6f8a580279b5c667128ee42b98f446a', #'7d5b490db9f64eea7ed5f5e741311326', #'59086813993fc4de945e8239d5da042d'] keys = [ '27839c5a7763855c5d80d3c4b197c46c', '0938b72b096ed9ee75db8fa28de4724c', '96a0dc33aaa7182ceee3b06e6475221c', '96d06c0d77e073a795317838b9c718ca', '3313f1daa25a36016da20007e82ae16f', 'e8a30509437a6c79d9fccf2dd94c0042', '46f32bf1e1c55088a030642a17ee0a32', '96a0dc33aaa7182ceee3b06e6475221c', '0938b72b096ed9ee75db8fa28de4724c' ] scopus = Scopus(keys[0]) turn = 0 handler = open("second_year.txt", "r") #write_handle = open("abs.txt","w") scs = handler.readlines() #print(len(scs)) test = scs[0] start = '2006' end = '2012' epoch = 1 pandasall = [] retryflag = 0 i = 0 while i < len(scs): print(i) scopusid = scs[i]
from pyscopus import Scopus # please generate your own key key = "*********" scopus = Scopus(key) #query_dict = {'affil': 'University of Iowa', 'authfirst': 'Xi', 'authlast': 'Wang'} #query_dict = {'affil': 'Stanford University', 'authfirst': 'Andrew', 'authlast': 'Ng'} #scopus.search_author(query_dict) #pub_info = scopus.search_abstract('0141607824') andrew_pubs = scopus.search_author_publication('35410071600') lda = andrew_pubs[152]