Example #1
0
def get_url(querier, phrase):
    # Setup query
    query = SearchScholarQuery()

    # Query title / phrase
    query.set_phrase(phrase)

    # Set title search only
    query.set_scope(True)

    # Result count
    query.set_num_page_results(1)

    try:
        # Send query
        querier.send_query(query)

        articles = querier.articles
        for art in articles:
            url, _, _ = art.attrs['url']
            url_pdf, _, _ = art.attrs['url_pdf']
            return url
    except:
        return None
def get_results_for(title, author):
    
    query = SearchScholarQuery()
    query.set_author(author)
    query.set_phrase(title)
    query.set_num_page_results(1)
    query.set_scope(True)

    settings = ScholarSettings()
    settings.set_citation_format(ScholarSettings.CITFORM_BIBTEX)
 
    querier = ScholarQuerier()
    querier.apply_settings(settings)
    querier.send_query(query)

    for art in querier.articles:
        
        print art.as_citation();
        
        bibtex_split = art.as_citation().split("\n")
        reftype = bibtex_split[0][1:-1].split("{")[0].lower(); 
        refid = bibtex_split[0][1:-1].split("{")[1].lower(); 
        bibtex_split.remove(bibtex_split[0])
    
        #print reftype + " " + refid + " " + str(bibtex_split)
    
        thismodule = sys.modules[__name__]

        while(True):
            
            try:
                features_of_type = getattr(thismodule, reftype).func_code.co_varnames[ 1: getattr(thismodule, reftype).func_code.co_argcount ]
                break;
            except AttributeError:
                var = raw_input("Type " + reftype + " not recongised, please enter a known type: ");
                reftype = var;
        
        while (True):
            arranged_name = []
            arranged_value = []
            for i in range(1, 10):
                arranged_name.append(None)
                arranged_value.append(None)
           
            for line in bibtex_split:
                if ( line.find("=") > -1 ):
                    stored_name = line.split("=")[0].strip()
                    stored_value = line.split("=")[1].strip();
                    stored_value = stored_value[1:-(len(stored_value)-stored_value.rfind("}"))]
                    if stored_name in features_of_type:
                        arranged_name[features_of_type.index(stored_name)] = stored_name
                        arranged_value[features_of_type.index(stored_name)] = stored_value
              
            short_arranged_name = arranged_name[ 0 : arranged_name.index(None)];
            short_arranged_value = arranged_value[ 0 : arranged_value.index(None)];
            
            if len(short_arranged_name) == len(features_of_type):
                return getattr(thismodule, reftype)(refid, *short_arranged_value).__getprintable__(True)
            else:
                for feature in features_of_type:
                    if ( feature not in arranged_name ):
                        var = raw_input(feature + " is not provided by the retrieved bibtex entry. Would you like to enter it now? (Y) or (N)");
                        if var == "Y":
                            var = raw_input("Enter value for " + feature + ": ");
                            bibtex_split.append(feature + " = {" + var + "}");
#input: full path to xlsx file containing article titles from scopus. Assumes that 
#titles are first column in the sheet 'Qatar_Scopus'. 
#output: csv file containing paper titles, citations, possibly free PDF link from web. 
from openpyxl import load_workbook
import sys
from scholar import ScholarQuerier, ScholarSettings, SearchScholarQuery,onecsv
from time import sleep
import csv

querier = ScholarQuerier()
settings = ScholarSettings()
querier.apply_settings(settings)
query = SearchScholarQuery()
query.set_scope(True)
alldata=[]
counter=1

xlsxfile=sys.argv[1]
wb = load_workbook(xlsxfile, use_iterators=True)
print wb.get_sheet_names()
ws = wb.get_sheet_by_name('Qatar_Scopus')

for row in ws.iter_rows(row_offset=1):
	if row[0].value is not None:
		temp=[]
		title=row[0].value.encode("utf-8")
		query.set_phrase(title)
		query.set_num_page_results(1)
		querier.send_query(query)
 	   	x=onecsv(querier)
	  	if (x!=None):
Example #4
0
#cites =[]
#citation_list = [] # [year of pub]

with open('citations_counts_25307.csv', "w") as csv_file:
    writer = csv.writer(csv_file, delimiter=',')
    for k,v in df['Article Title'][25307:].iteritems(): 
        # why is df['Article Title'] of type dict??
        
        querier = ScholarQuerier()
        settings = ScholarSettings()
        settings.set_citation_format(ScholarSettings.CITFORM_BIBTEX)
        querier.apply_settings(settings)
        query = SearchScholarQuery()
        query.set_phrase(v)
        query.set_scope(True)
        querier.send_query(query)
        if querier.articles:
            cites=querier.articles[0].__getitem__('num_citations')
            citation_list=querier.articles[0].__getitem__('url_citations')
        else:
            cites=0
            citation_list= ""
            
    #    json_results = []
    #    file_name = 'query_data/cites_for_article_'+str(k+1)+'.json'
    #    for art in querier.articles:
    #        json_results.append(
    #            {key: art.attrs[key][0] for key in art.attrs.keys()})
    #    with open(file_name, 'wb') as f:
    #        json.dump(json_results, f)