def papers_by_query_api(request): if request.method == 'GET': phrase = request.GET.get('phrase', '') if not phrase: return HttpResponseBadRequest() query = SearchScholarQuery() query.set_phrase(phrase) querier = ScholarQuerier() querier.send_query(query) papers = querier.articles if not papers: result = { 'papers': [{ 'title': '', 'id': 0, 'url': '', 'excerpt': '' }] } else: result = { 'papers': [{ 'title': papers[0]['title'], 'id': papers[0]['cluster_id'], 'url': papers[0]['url'], 'excerpt': papers[0]['excerpt'] }] } return JsonResponse(result) else: return HttpResponseBadRequest()
def getRelatedPublications(author): print author settings = ScholarSettings() #adjust scholar settings querier = ScholarQuerier() #Instance of ScholarQuerier() conducts a search on Google Scholar querier.apply_settings(settings) #applies settings as provided by the instance of ScholarSettings() query = SearchScholarQuery() query.set_author(author) querier.send_query(query) print querier.articles
def getResult(query): querier = ScholarQuerier() citations = 0 url_citations = "" clusterID = "" try: querier.send_query(query) print querier.articles[0].attrs['cluster_id'] citations = querier.articles[0].attrs['num_citations'][0] url_citations = querier.articles[0].attrs['url_citations'][0] clusterID = querier.articles[0].attrs['cluster_id'][0] except: pass return citations, url_citations, clusterID
def process(document): scholar = ScholarQuerier() query = SearchScholarQuery() # save cookie at first paper global save_cookie if save_cookie: query.set_phrase("quantum theory") scholar.send_query(query) scholar.save_cookies() save_cookie = False query.set_phrase(document.title) scholar.send_query(query) scholar_articles = scholar.articles if len(scholar_articles) == 0: return None title_match_ratio = \ difflib.SequenceMatcher(None, document.title, scholar_articles[0]['title']).ratio() if title_match_ratio < min_title_match_ratio: return None old_tags = document.tags citation_tag = ncitations_to_tag(scholar_articles[0]['num_citations']) new_tags = update_tags(old_tags, [(tag_pattern, citation_tag)]) new_tags.append(str(scholar_articles[0]['num_citations'])) document.update(tags=new_tags) return scholar_articles[0]['num_citations']
def search(bot, update, args): search_command = ' '.join(args) bot.send_message(chat_id=update.message.chat_id, text="You searched for: " + search_command) querier = ScholarQuerier() query = SearchScholarQuery() query.set_words(args) querier.send_query(query) articles = querier.articles message = "" bot.send_message(chat_id=update.message.chat_id, text="Number of results: " + str(len(articles))) index = 0 for article in articles: bot.send_message(chat_id=update.message.chat_id, text=str(index+1)+". " + article.attrs['title'][0])
def getPublications_Title(title): querier = ScholarQuerier() settings = ScholarSettings() querier.apply_settings(settings) query = SearchScholarQuery() publications = [] query.set_words(title) querier.send_query(query) related_list = scholar.json(querier) if related_list: print "No of related publications found : ", print len(related_list) for item in related_list: #print item.keys() #item["relatedTitle"] = title[0] publications.append(item) #time.sleep(random.randrange(10, 40, 2)); #time.sleep(60); return publications
def process(document): scholar = ScholarQuerier() query = SearchScholarQuery() query.set_phrase(document.title) scholar.send_query(query) scholar_articles = scholar.articles if len(scholar_articles) == 0: return None title_match_ratio = \ difflib.SequenceMatcher(None, document.title, scholar_articles[0]['title']).ratio() if title_match_ratio < min_title_match_ratio: return None old_tags = document.tags citation_tag = ncitations_to_tag(scholar_articles[0]['num_citations']) new_tags = update_tags(old_tags, [(tag_pattern, citation_tag)]) document.update(tags=new_tags) return scholar_articles[0]['num_citations']
def query_scholar_for_papers(author, searchstring): querier = ScholarQuerier() settings = ScholarSettings() settings.set_citation_format(settings.CITFORM_BIBTEX) settings.set_per_page_results(5) querier.apply_settings(settings) query = SearchScholarQuery() query.set_author(author) query.set_phrase(searchstring) querier.send_query(query) return_str = '' if len(querier.articles) > 0: return_str += querier.articles[0].as_citation() + '\n' else: return_str = 'Ooopsie. No results. Maybe we ran over the request limit?' return return_str
def blocked(): print "Test if blocked...." #time.sleep(random.randrange(10, 40, 2)); time.sleep(60); publications = [] querier = ScholarQuerier() settings = ScholarSettings() querier.apply_settings(settings) query = SearchScholarQuery() query.set_author("Ryan Baker") querier.send_query(query) related_list = scholar.json(querier) if related_list: print "Block Test : No of related publications found : ", print len(related_list) for item in related_list: publications.append(item) if len(publications) == 0: return True else: return False
def find_article(canidates): """ A function that attempts to find a good match from scholar.py for a set of canidate paper titles, it returns the best result. """ canidate_scores = [5000 for dummy in canidates] canidate_best_match = ['' for dummy in canidates] querier = ScholarQuerier() delay = 0 for ii, canidate in enumerate(canidates): sleep(delay) querier.UA = random.choice(USER_AGENTS) querier.query(canidate) for art in querier.articles: title = art['title'].encode('ascii', 'ignore') score = str_metric(canidate, title)/max(len(title),len(canidate)) if score < canidate_scores[ii]: canidate_scores[ii] = score canidate_best_match[ii] = art print '----------------------' print 'Canidate: '+canidate print 'Match: '+ \ canidate_best_match[ii]['title'].encode('ascii', 'ignore') print 'Score '+str(canidate_scores[ii]) querier.clear_articles() delay = max(random.gauss(30, 30), 5)
def cites_api(request): if request.method == 'GET': paper_id = request.GET.get('paper_id', 0) page = request.GET.get('page', None) if not paper_id or page is None: return HttpResponseBadRequest() query = CitesScholarQuery(paper_id, page) querier = ScholarQuerier() querier.send_query(query) papers = querier.articles cites = [] for paper in papers: if not paper['cluster_id']: continue cites.append({'title': paper['title'], 'id': paper['cluster_id'], 'url': paper['url']}) return JsonResponse({'paper_id': paper_id, 'cites': cites}) else: return HttpResponseBadRequest()
def papers_by_query_api(request): if request.method == 'GET': phrase = request.GET.get('phrase', '') if not phrase: return HttpResponseBadRequest() query = SearchScholarQuery() query.set_phrase(phrase) querier = ScholarQuerier() querier.send_query(query) papers = querier.articles if not papers: result = {'papers': [{'title': '', 'id': 0, 'url': '', 'excerpt': ''}]} else: result = {'papers': [{'title': papers[0]['title'], 'id': papers[0]['cluster_id'], 'url': papers[0]['url'], 'excerpt': papers[0]['excerpt']}]} return JsonResponse(result) else: return HttpResponseBadRequest()
def getPublications(authors): print authors querier = ScholarQuerier() settings = ScholarSettings() querier.apply_settings(settings) query = SearchScholarQuery() publications = [] for author in authors: if len(author) > 0: print "Using Author : ", print author query.set_author(author) querier.send_query(query) related_list = scholar.json(querier) if related_list: print "No of related publications found : ", print len(related_list) for item in related_list: #print item.keys() #item["relatedAuthor"] = author publications.append(item) #time.sleep(random.randrange(10, 40, 2)); time.sleep(20); return publications
def cites_api(request): if request.method == 'GET': paper_id = request.GET.get('paper_id', 0) page = request.GET.get('page', None) if not paper_id or page is None: return HttpResponseBadRequest() query = CitesScholarQuery(paper_id, page) querier = ScholarQuerier() querier.send_query(query) papers = querier.articles cites = [] for paper in papers: if not paper['cluster_id']: continue cites.append({ 'title': paper['title'], 'id': paper['cluster_id'], 'url': paper['url'] }) return JsonResponse({'paper_id': paper_id, 'cites': cites}) else: return HttpResponseBadRequest()
def getPublications(author): print author querier = ScholarQuerier() settings = ScholarSettings() querier.apply_settings(settings) query = SearchScholarQuery() query.set_author(author) querier.send_query(query) #scholar.csv(querier) scholar.txt(querier, with_globals=False)
def getRelatedPublications(author): print author settings = ScholarSettings() #adjust scholar settings querier = ScholarQuerier( ) #Instance of ScholarQuerier() conducts a search on Google Scholar querier.apply_settings( settings ) #applies settings as provided by the instance of ScholarSettings() query = SearchScholarQuery() query.set_author(author) querier.send_query(query) print querier.articles
def literature_search(query_terms, type='full_name'): """ perform a google scholar query with given terms """ querier = ScholarQuerier() settings = ScholarSettings() config = ScholarConf() settings.set_citation_format(ScholarSettings.CITFORM_BIBTEX) querier.apply_settings(settings) query = SearchScholarQuery() papers = [] for item in query_terms.values: repo_id = item[0] if type !='full_name': repo_name = item[1] phrase = item[2] keywords = item[3] start_year = item[4] if keywords: if ',' not in keywords: keywords = keywords + ',' query.set_words_some(keywords) query.set_words(repo_name) query.set_phrase(phrase) phrase_text = repo_name + ', ' + phrase else: phrase = item[1] start_year = item[2] query.set_phrase(phrase) # commontk/CTK, meoyo/AIPS phrase_text = phrase print('search papers for {} ...'.format(phrase_text)) query.set_timeframe(start_year) querier.send_query(query) articles = querier.articles if len(articles)==0: continue results = process_arts(config, item[0], phrase_text, articles) papers = papers + results time_delay = random.randrange(1,10) time.sleep(time_delay) return papers
def blocked(): print "Test if blocked...." #time.sleep(random.randrange(10, 40, 2)); time.sleep(60) publications = [] querier = ScholarQuerier() settings = ScholarSettings() querier.apply_settings(settings) query = SearchScholarQuery() query.set_author("Ryan Baker") querier.send_query(query) related_list = scholar.json(querier) if related_list: print "Block Test : No of related publications found : ", print len(related_list) for item in related_list: publications.append(item) if len(publications) == 0: return True else: return False
def getPublications(authors): print authors querier = ScholarQuerier() settings = ScholarSettings() querier.apply_settings(settings) query = SearchScholarQuery() publications = [] for author in authors: if len(author) > 0: print "Using Author : ", print author query.set_author(author) querier.send_query(query) related_list = scholar.json(querier) if related_list: print "No of related publications found : ", print len(related_list) for item in related_list: #print item.keys() #item["relatedAuthor"] = author publications.append(item) #time.sleep(random.randrange(10, 40, 2)); time.sleep(20) return publications
#!/usr/bin/env python # -*- coding: utf-8 -*- import logging import yapbib.biblist as biblist from scholar import ScholarQuerier from . import db q = ScholarQuerier() COMMON_FIELDS = ('id', 'title', 'year', 'author', 'url', 'citation_count', 'version_count', 'citation_url', 'version_url', 'related_url') class Article(object): @staticmethod def from_db(d): if d is None: return None a = Article() for name in COMMON_FIELDS: assert hasattr(d, name), name setattr(a, name, getattr(d, name)) return a @property def complete(self): """Whether article has complete information.""" for key in COMMON_FIELDS: if not hasattr(self, key) or getattr(self, key) is None: return False
import re from pybtex.database import parse_bytes, parse_file from scholar import ScholarQuerier, ScholarSettings, SearchScholarQuery DST = Path(__file__).absolute().parents[1] / 'publications.bib' IGNORE = """ vo2014cytotoxicity takeilnatriureticpeptideisolatedfromeelbrain matchintemporal brodbeck2018transformation """.split() ACRONYMS = ['EEG', 'MEG', 'MRI'] querier = ScholarQuerier() settings = ScholarSettings() settings.set_citation_format(ScholarSettings.CITFORM_BIBTEX) querier.apply_settings(settings) query = SearchScholarQuery() query.set_phrase("eelbrain") query.set_timeframe(2012, None) query.set_include_patents(False) bib = parse_file(DST, 'bibtex') start = 0 while True: querier.send_query(query) if len(querier.articles) == 0: break
return url except: return None if __name__ == "__main__": parser = argparse.ArgumentParser(description='biburl.py') parser.add_argument('-f', '--bib_file', type=str, required=True, help="Bibtex file <references.bib>") args = parser.parse_args() # Initialize scholar querier querier = ScholarQuerier() # Setup scholar settings settings = ScholarSettings() settings.set_citation_format(ScholarSettings.CITFORM_BIBTEX) # Apply settings querier.apply_settings(settings) # Load bib file if not os.path.exists(args.bib_file): raise IOException('File does not exist %s' % args.bib_file) with open(args.bib_file) as bib_file: bib_db = bibtexparser.load(bib_file) for j, item in enumerate(bib_db.entries):
def get_results_for(title, author): query = SearchScholarQuery() query.set_author(author) query.set_phrase(title) query.set_num_page_results(1) query.set_scope(True) settings = ScholarSettings() settings.set_citation_format(ScholarSettings.CITFORM_BIBTEX) querier = ScholarQuerier() querier.apply_settings(settings) querier.send_query(query) for art in querier.articles: print art.as_citation(); bibtex_split = art.as_citation().split("\n") reftype = bibtex_split[0][1:-1].split("{")[0].lower(); refid = bibtex_split[0][1:-1].split("{")[1].lower(); bibtex_split.remove(bibtex_split[0]) #print reftype + " " + refid + " " + str(bibtex_split) thismodule = sys.modules[__name__] while(True): try: features_of_type = getattr(thismodule, reftype).func_code.co_varnames[ 1: getattr(thismodule, reftype).func_code.co_argcount ] break; except AttributeError: var = raw_input("Type " + reftype + " not recongised, please enter a known type: "); reftype = var; while (True): arranged_name = [] arranged_value = [] for i in range(1, 10): arranged_name.append(None) arranged_value.append(None) for line in bibtex_split: if ( line.find("=") > -1 ): stored_name = line.split("=")[0].strip() stored_value = line.split("=")[1].strip(); stored_value = stored_value[1:-(len(stored_value)-stored_value.rfind("}"))] if stored_name in features_of_type: arranged_name[features_of_type.index(stored_name)] = stored_name arranged_value[features_of_type.index(stored_name)] = stored_value short_arranged_name = arranged_name[ 0 : arranged_name.index(None)]; short_arranged_value = arranged_value[ 0 : arranged_value.index(None)]; if len(short_arranged_name) == len(features_of_type): return getattr(thismodule, reftype)(refid, *short_arranged_value).__getprintable__(True) else: for feature in features_of_type: if ( feature not in arranged_name ): var = raw_input(feature + " is not provided by the retrieved bibtex entry. Would you like to enter it now? (Y) or (N)"); if var == "Y": var = raw_input("Enter value for " + feature + ": "); bibtex_split.append(feature + " = {" + var + "}");
ScholarConf.COOKIE_JAR_FILE = 'cookies.txt' min_sleep_time_sec = 5 max_sleep_time_sec = 10 df = pd.read_csv("../data_seperate_sheet/Profile_Publications_Standard.csv") #cites =[] #citation_list = [] # [year of pub] with open('citations_counts_25307.csv', "w") as csv_file: writer = csv.writer(csv_file, delimiter=',') for k,v in df['Article Title'][25307:].iteritems(): # why is df['Article Title'] of type dict?? querier = ScholarQuerier() settings = ScholarSettings() settings.set_citation_format(ScholarSettings.CITFORM_BIBTEX) querier.apply_settings(settings) query = SearchScholarQuery() query.set_phrase(v) query.set_scope(True) querier.send_query(query) if querier.articles: cites=querier.articles[0].__getitem__('num_citations') citation_list=querier.articles[0].__getitem__('url_citations') else: cites=0 citation_list= "" # json_results = []
from string import ascii_lowercase import re from pybtex.database import parse_bytes, parse_file from scholar import ScholarQuerier, ScholarSettings, SearchScholarQuery DST = Path(__file__).absolute().parents[1] / 'publications.bib' IGNORE = """ vo2014cytotoxicity takeilnatriureticpeptideisolatedfromeelbrain matchintemporal brodbeck2018transformation """.split() ACRONYMS = ['EEG', 'MEG', 'MRI'] querier = ScholarQuerier() settings = ScholarSettings() settings.set_citation_format(ScholarSettings.CITFORM_BIBTEX) querier.apply_settings(settings) query = SearchScholarQuery() query.set_phrase("eelbrain") query.set_timeframe(2012, None) query.set_include_patents(False) bib = parse_file(DST, 'bibtex') start = 0 while True: querier.send_query(query) if len(querier.articles) == 0: break # extract articles
#input: full path to xlsx file containing article titles from scopus. Assumes that #titles are first column in the sheet 'Qatar_Scopus'. #output: csv file containing paper titles, citations, possibly free PDF link from web. from openpyxl import load_workbook import sys from scholar import ScholarQuerier, ScholarSettings, SearchScholarQuery,onecsv from time import sleep import csv querier = ScholarQuerier() settings = ScholarSettings() querier.apply_settings(settings) query = SearchScholarQuery() query.set_scope(True) alldata=[] counter=1 xlsxfile=sys.argv[1] wb = load_workbook(xlsxfile, use_iterators=True) print wb.get_sheet_names() ws = wb.get_sheet_by_name('Qatar_Scopus') for row in ws.iter_rows(row_offset=1): if row[0].value is not None: temp=[] title=row[0].value.encode("utf-8") query.set_phrase(title) query.set_num_page_results(1) querier.send_query(query) x=onecsv(querier) if (x!=None):
return False # edit config.json first mendeley = create_client() if skip_documents: print('Already tagged documents by mendeley_add_citations.py are skipped.') else: print('Processing all documents, including already tagged by mendeley_add_citations.py.') print('See skip_documents variable in mendeley_add_citations.py to change this.\n') print('Tags are added immediately. You can interrupt the script and continue later.\n') print('citations\tyear\tMendeley library title') num_skipped = 0 documents = mendeley.library(items=-1) scholar = ScholarQuerier(count=1) for docid in documents['document_ids']: document = mendeley.document_details(docid) if skip_documents and has_citation_tag(document['tags'], ['citations_.*']): num_skipped = num_skipped+1 continue try: scholar.query(document['title']) scholar_articles = scholar.articles if len(scholar_articles) == 0: print('No scholar articles found for ' + document['title']) continue except urllib2.HTTPError as e: print e.msg