Ejemplo n.º 1
0
def search_for_range(filename, time_range, keys):
    '''
    file name is a file name to read scopusid of reviewers
    time range is a list of 2 string, the first is the start time , the second
    is the end time. For example:  ['2011','2012']
    keys are the created api keys, usually 10 keys could cover 70000~80000
    searches, it is a list of string, each string is a key
    
    '''
    scopus = Scopus(key)
    handler = open(filename, "r")
    #write_handle = open("abs.txt","w")
    scs = handler.readlines()
    #print(len(scs))
    #test = scs[0]
    start = time_range[0]
    end = time_range[1]
    epoch = 0
    pandasall = []
    for scopusid in scs:
        try:
            if (epoch > 16000):
                scopus = Scopus(key2)
            elif (epoch > 32000):
                scopus = Scopus(key3)
    #        write_handle.write("author: "+ scopusid)
    #        write_handle.write("\n")
            kang = scopus.search_author_publication(scopusid)
            epoch += 1
            kang_range = kang[(kang['cover_date'] < end)
                              & (kang['cover_date'] > start)]
            abstracts = []
            ids = kang_range['scopus_id'].tolist()
            for oneid in ids:
                abstract = scopus.retrieve_abstract(oneid)
                abstracts.append(abstract['abstract'])
                #            write_handle.write(abstract['abstract'])
                #            write_handle.write("\n")
                #            write_handle.write("**********\n")
                epoch += 1
            kang_range['abstract'] = pandas.Series(abstracts, kang_range.index)
            pandasall.append(kang_range)
            #        print(kang_range)
            #        write_handle.write("\n")
            #        write_handle.write("++++++++++\n")
            print(epoch)
        except:
            continue
    result = pandas.concat(pandasall)
    result.to_pickle("result.pkl")
    result.to_csv('result.csv', encoding='utf-8', index=False)
Ejemplo n.º 2
0
def scopus_search(string):
    """Run the search string returned by the function
        string_formulation() in the digital library
        Scopus via pyscopus.

    Args:
        string: Search string to be used.

    Returns:
        search_df: Structure containing all search
            results in Scopus digital library
    """

    results = 3000

    # Insert your Scopus API Key
    key = '7f59af901d2d86f78a1fd60c1bf9426a'
    scopus = Scopus(key)

    try:
        search_df = scopus.search(string,
                                  count=results,
                                  view='STANDARD',
                                  type_=1)
    except Exception as e:
        print("Exception: " + str(e))
        return -1

    pd.options.display.max_rows = 99999
    pd.options.display.max_colwidth = 250

    search_df[['title']].to_csv(os.path.join(sys.path[0], "exits/result.csv"),
                                index_label=False,
                                encoding='utf-8',
                                index=False,
                                header=True,
                                sep='\t')

    return int(len(search_df))
Ejemplo n.º 3
0
from pyscopus import Scopus
key = '50312b4710a276ec9670642f0f455259'
scopus = Scopus(key)


def pencarian(nama):
    search_df = scopus.search("AUTHLASTNAME(" + nama + ")", count=10)
    return search_df.head(10)
Ejemplo n.º 4
0
# -*- coding: utf-8 -*-
# pylint:disable=import-error
from __future__ import absolute_import

import logging
import os
import re
from glob import glob
from pathlib import Path

from ccdc import io
from crossref.restful import Works
from pyscopus import Scopus

scopus = Scopus(os.environ['SCOPUS_API_KEY'])

logger = logging.getLogger()
logger.setLevel(logging.INFO)


def get_structure_list(directory: str, extension: str = 'cif') -> list:
    """

    Args:
        directory:
        extension:

    Returns:

    """
    logger.info('getting structure list')
Ejemplo n.º 5
0
from pyscopus import Scopus
import requests
import json
import environ
import pprint

env = environ.Env()
# reading .env file
environ.Env.read_env()
# API Docs - https://kitchingroup.cheme.cmu.edu/blog/2015/04/03/Getting-data-from-the-Scopus-API/

API_KEY = env('SCOPUS_API_KEY')
scopus = Scopus(API_KEY)


# author = scopus.retrieve_author('36706629400')
# print(author)
def get_scopus_data(titles):

    for title in titles:

        resp = requests.get(
            f"http://api.elsevier.com/content/search/scopus?query=TITLE({title})&field=citedby-count",
            headers={
                'Accept': 'application/json',
                'X-ELS-APIKey': API_KEY
            })
        data = json.dumps(resp.json(),
                          sort_keys=True,
                          indent=4,
                          separators=(',', ': '))
Ejemplo n.º 6
0
import pyscopus
from pyscopus import Scopus
import pandas as pd


key='bc87ef89794f6f531edb4162d12d3e7e'
scopus = Scopus(key)
authors = pd.read_csv("authors.csv")

authors_id=authors['author_id'].values

publications=pd.DataFrame(columns=['title', 'cover_date','authors'])


for i in range(210,len(authors_id)):
        try:
                author_publications_df=scopus.search_author_publication(authors_id[i],count=500)
                author_publications_df=author_publications_df[['title', 'cover_date','authors']]
                publications=publications.append(author_publications_df)
                print(publications)
                publications.to_csv('titles.csv',index=False,sep='\t')
                print(i+1) 
        except :
                pass
    
Ejemplo n.º 7
0
import pyscopus
from pyscopus import Scopus
import pandas as pd

key='bc87ef89794f6f531edb4162d12d3e7e'
scopus = Scopus(key)
subject_area=["SUBJAREA(AGRI)","SUBJAREA(BIOC)","SUBJAREA(BUSI)","SUBJAREA(CENG)","SUBJAREA(CHEM)","SUBJAREA(COMP)","SUBJAREA(DECI)","SUBJAREA(PHAR)","SUBJAREA(MULT)",
"SUBJAREA(VETE)","SUBJAREA(SOCI)","SUBJAREA(PSYC)","SUBJAREA(NURS)","SUBJAREA(NEUR)","SUBJAREA(DENT)","SUBJAREA(EART)","SUBJAREA(ECON)","SUBJAREA(ENER)","SUBJAREA(ENGI)","SUBJAREA(ENVI)",
"SUBJAREA(HEAL)","SUBJAREA(MATH)","SUBJAREA(IMMU)","SUBJAREA(MATE)","SUBJAREA(MEDI)","SUBJAREA(PHYS)"]


authors=pd.DataFrame(columns=['author_id','name','affiliation_id'])

for subj in subject_area:
    try:
        author_search_df = scopus.search_author(subj,count=2500)
        author_search_df=author_search_df[['author_id','name','affiliation_id']]
        authors=authors.append(author_search_df).drop_duplicates()
        print(authors)
        authors.to_csv('authors.csv',index=False)
    except :
        pass
    
    





Ejemplo n.º 8
0
##              '( DOCTYPE(ar)  OR  DOCTYPE(re) OR DOCTYPE(ip) )'
search_term = 'TITLE-ABS-KEY ( infrastructure AND resilience )  AND  ' + \
              '( DOCTYPE(ar)  OR  DOCTYPE(re) OR DOCTYPE(ip) )'

# File paths
path_root = 'P:/ene.yssp/Yaoping_Wang/Applications/2017-18 Job Applications/2018-10-08 Microsoft/'
path_json = os.path.join(path_root, 'data')
path_out = os.path.join(path_root, 'intermediate', 'infrastructure_resilience')

# Load API key configuration
con_file = open(os.path.join(path_json, "config.json"))
config = json.load(con_file)
con_file.close()

# Conduct search
scopus = Scopus(config['apikey'])
search_df = scopus.search(search_term, count=5000)

# Save the search results by journal & year of publication
# ---- only save journals that have at least 2 papers
jour_list = []
for xx in search_df['publication_name'].unique():
    if (sum(search_df['publication_name'] == xx) > 1):
        # ---- adjust some irregularities in journal names
        xx2 = re.sub('[^a-zA-Z]+', ' ', xx)
        xx2 = xx2.strip(' ')
        if ((len(path_out) + len(xx2)) > 200):
            xx2 = xx2[:(200 - len(path_out))]
        search_df.loc[search_df['publication_name'] == xx,
                      'publication_name'] = xx2
        jour_list.append(xx2)
Ejemplo n.º 9
0
import pyscopus
from pyscopus import Scopus
import pandas as pd

key='bc87ef89794f6f531edb4162d12d3e7e'
scopus = Scopus(key)

authors = pd.read_csv("authors.csv")

affiliations_id=authors['affiliation_id'].values
affiliations=pd.DataFrame(columns=['affiliation-name','city','country','aff_id'])

print(len(affiliations_id))
for aff in affiliations_id:
    try:
        affiliation=scopus.retrieve_affiliation(str(int(aff)))
        affil=pd.DataFrame([[affiliation['affiliation-name'],affiliation['city'],affiliation['country'],affiliation['aff_id']]],columns=affiliations.columns)
        affiliations=affiliations.append(affil).drop_duplicates()   
        print(affiliations)
        affiliations.to_csv('affiliations.csv',index=False,sep='\t')    
    except :
        pass
    

   
   
Ejemplo n.º 10
0
#Locations database (attached).
db = 'Locations.db'
connection = sqlite3.connect(db)
cursor = connection.cursor()

#we have a table of dois and dates for all refcodes (pulled straight from Conquest).
cursor.execute(
    "SELECT t1.refcode, t1.field2,t1.field3, t1.field4 FROM DOI_List t1 LEFT JOIN Locations t2 ON t2.refcode = t1.refcode WHERE t2.refcode IS NULL ")
refcodes = cursor.fetchall()
connection.close()
print len(refcodes)

#You need a SCOPUS API key to access the data.
key = ""
scopus = Scopus(key)

#for each entry...
for refcode in refcodes:
    print refcode[0]
    connection = sqlite3.connect(db)
    cursor = connection.cursor()
    row_exsists = len((cursor.execute(
        "SELECT 1 FROM Fails WHERE refcode = '%s'" % (
            refcode[0]))).fetchall()) > 0 # some errors occur, to avoid repeating them I've created a fail table to just get rid of them.
    if not row_exsists and refcode[0] != "AFUYIL":
        try:

            ref = refcode[0]
            doi = refcode[1]
            print doi
Ejemplo n.º 11
0
from pyscopus import Scopus

# please generate your own key
scopus = Scopus(key)

#query_dict = {'affil': 'University of Iowa', 'authfirst': 'Xi', 'authlast': 'Wang'}
#query_dict = {'affil': 'Stanford University', 'authfirst': 'Andrew', 'authlast': 'Ng'}
#scopus.search_author(query_dict)

pub_info = scopus.search_abstract('0141607824')
#andrew_pubs = scopus.search_author_publication('35410071600')
#keys = ['d07c3456c71ef4fe35c34d8fe27b4318','30da432d73ea6cbd1ca3ce5540ea28a4',
#'6a09f4f9f223b25918ce5f0d668e33ca','e22fa0176eca805467bbf221346fad10', #'03ce6b33a244726d9d6f1ad3019f762f','eff565636066139dfb2954173c6d1d71','a9c1644234d5c8fe9665b532f494a6ec',
#'53f2ce45049139b69d36805c0fe0f471',
#'63faec376ab55a4efe9ae13be074ecc9',
#'a6f8a580279b5c667128ee42b98f446a',
#'7d5b490db9f64eea7ed5f5e741311326',
#'59086813993fc4de945e8239d5da042d']
keys = [
    '27839c5a7763855c5d80d3c4b197c46c', '0938b72b096ed9ee75db8fa28de4724c',
    '96a0dc33aaa7182ceee3b06e6475221c', '96d06c0d77e073a795317838b9c718ca',
    '3313f1daa25a36016da20007e82ae16f', 'e8a30509437a6c79d9fccf2dd94c0042',
    '46f32bf1e1c55088a030642a17ee0a32', '96a0dc33aaa7182ceee3b06e6475221c',
    '0938b72b096ed9ee75db8fa28de4724c'
]
scopus = Scopus(keys[0])
turn = 0
handler = open("second_year.txt", "r")
#write_handle = open("abs.txt","w")
scs = handler.readlines()
#print(len(scs))
test = scs[0]
start = '2006'
end = '2012'
epoch = 1
pandasall = []
retryflag = 0
i = 0
while i < len(scs):
    print(i)
    scopusid = scs[i]
Ejemplo n.º 13
0
from pyscopus import Scopus

# please generate your own key
key = "*********"
scopus = Scopus(key)

#query_dict = {'affil': 'University of Iowa', 'authfirst': 'Xi', 'authlast': 'Wang'}
#query_dict = {'affil': 'Stanford University', 'authfirst': 'Andrew', 'authlast': 'Ng'}
#scopus.search_author(query_dict)

#pub_info = scopus.search_abstract('0141607824')
andrew_pubs = scopus.search_author_publication('35410071600')
lda = andrew_pubs[152]