def get_url(querier, phrase): # Setup query query = SearchScholarQuery() # Query title / phrase query.set_phrase(phrase) # Set title search only query.set_scope(True) # Result count query.set_num_page_results(1) try: # Send query querier.send_query(query) articles = querier.articles for art in articles: url, _, _ = art.attrs['url'] url_pdf, _, _ = art.attrs['url_pdf'] return url except: return None
def get_results_for(title, author): query = SearchScholarQuery() query.set_author(author) query.set_phrase(title) query.set_num_page_results(1) query.set_scope(True) settings = ScholarSettings() settings.set_citation_format(ScholarSettings.CITFORM_BIBTEX) querier = ScholarQuerier() querier.apply_settings(settings) querier.send_query(query) for art in querier.articles: print art.as_citation(); bibtex_split = art.as_citation().split("\n") reftype = bibtex_split[0][1:-1].split("{")[0].lower(); refid = bibtex_split[0][1:-1].split("{")[1].lower(); bibtex_split.remove(bibtex_split[0]) #print reftype + " " + refid + " " + str(bibtex_split) thismodule = sys.modules[__name__] while(True): try: features_of_type = getattr(thismodule, reftype).func_code.co_varnames[ 1: getattr(thismodule, reftype).func_code.co_argcount ] break; except AttributeError: var = raw_input("Type " + reftype + " not recongised, please enter a known type: "); reftype = var; while (True): arranged_name = [] arranged_value = [] for i in range(1, 10): arranged_name.append(None) arranged_value.append(None) for line in bibtex_split: if ( line.find("=") > -1 ): stored_name = line.split("=")[0].strip() stored_value = line.split("=")[1].strip(); stored_value = stored_value[1:-(len(stored_value)-stored_value.rfind("}"))] if stored_name in features_of_type: arranged_name[features_of_type.index(stored_name)] = stored_name arranged_value[features_of_type.index(stored_name)] = stored_value short_arranged_name = arranged_name[ 0 : arranged_name.index(None)]; short_arranged_value = arranged_value[ 0 : arranged_value.index(None)]; if len(short_arranged_name) == len(features_of_type): return getattr(thismodule, reftype)(refid, *short_arranged_value).__getprintable__(True) else: for feature in features_of_type: if ( feature not in arranged_name ): var = raw_input(feature + " is not provided by the retrieved bibtex entry. Would you like to enter it now? (Y) or (N)"); if var == "Y": var = raw_input("Enter value for " + feature + ": "); bibtex_split.append(feature + " = {" + var + "}");
#input: full path to xlsx file containing article titles from scopus. Assumes that #titles are first column in the sheet 'Qatar_Scopus'. #output: csv file containing paper titles, citations, possibly free PDF link from web. from openpyxl import load_workbook import sys from scholar import ScholarQuerier, ScholarSettings, SearchScholarQuery,onecsv from time import sleep import csv querier = ScholarQuerier() settings = ScholarSettings() querier.apply_settings(settings) query = SearchScholarQuery() query.set_scope(True) alldata=[] counter=1 xlsxfile=sys.argv[1] wb = load_workbook(xlsxfile, use_iterators=True) print wb.get_sheet_names() ws = wb.get_sheet_by_name('Qatar_Scopus') for row in ws.iter_rows(row_offset=1): if row[0].value is not None: temp=[] title=row[0].value.encode("utf-8") query.set_phrase(title) query.set_num_page_results(1) querier.send_query(query) x=onecsv(querier) if (x!=None):
#cites =[] #citation_list = [] # [year of pub] with open('citations_counts_25307.csv', "w") as csv_file: writer = csv.writer(csv_file, delimiter=',') for k,v in df['Article Title'][25307:].iteritems(): # why is df['Article Title'] of type dict?? querier = ScholarQuerier() settings = ScholarSettings() settings.set_citation_format(ScholarSettings.CITFORM_BIBTEX) querier.apply_settings(settings) query = SearchScholarQuery() query.set_phrase(v) query.set_scope(True) querier.send_query(query) if querier.articles: cites=querier.articles[0].__getitem__('num_citations') citation_list=querier.articles[0].__getitem__('url_citations') else: cites=0 citation_list= "" # json_results = [] # file_name = 'query_data/cites_for_article_'+str(k+1)+'.json' # for art in querier.articles: # json_results.append( # {key: art.attrs[key][0] for key in art.attrs.keys()}) # with open(file_name, 'wb') as f: # json.dump(json_results, f)