def process(document): scholar = ScholarQuerier() query = SearchScholarQuery() # save cookie at first paper global save_cookie if save_cookie: query.set_phrase("quantum theory") scholar.send_query(query) scholar.save_cookies() save_cookie = False query.set_phrase(document.title) scholar.send_query(query) scholar_articles = scholar.articles if len(scholar_articles) == 0: return None title_match_ratio = \ difflib.SequenceMatcher(None, document.title, scholar_articles[0]['title']).ratio() if title_match_ratio < min_title_match_ratio: return None old_tags = document.tags citation_tag = ncitations_to_tag(scholar_articles[0]['num_citations']) new_tags = update_tags(old_tags, [(tag_pattern, citation_tag)]) new_tags.append(str(scholar_articles[0]['num_citations'])) document.update(tags=new_tags) return scholar_articles[0]['num_citations']
def papers_by_query_api(request): if request.method == 'GET': phrase = request.GET.get('phrase', '') if not phrase: return HttpResponseBadRequest() query = SearchScholarQuery() query.set_phrase(phrase) querier = ScholarQuerier() querier.send_query(query) papers = querier.articles if not papers: result = { 'papers': [{ 'title': '', 'id': 0, 'url': '', 'excerpt': '' }] } else: result = { 'papers': [{ 'title': papers[0]['title'], 'id': papers[0]['cluster_id'], 'url': papers[0]['url'], 'excerpt': papers[0]['excerpt'] }] } return JsonResponse(result) else: return HttpResponseBadRequest()
def getRelatedPublications(author): print author settings = ScholarSettings() #adjust scholar settings querier = ScholarQuerier() #Instance of ScholarQuerier() conducts a search on Google Scholar querier.apply_settings(settings) #applies settings as provided by the instance of ScholarSettings() query = SearchScholarQuery() query.set_author(author) querier.send_query(query) print querier.articles
def getPublications(author): print author querier = ScholarQuerier() settings = ScholarSettings() querier.apply_settings(settings) query = SearchScholarQuery() query.set_author(author) querier.send_query(query) #scholar.csv(querier) scholar.txt(querier, with_globals=False)
def getRelatedPublications(author): print author settings = ScholarSettings() #adjust scholar settings querier = ScholarQuerier( ) #Instance of ScholarQuerier() conducts a search on Google Scholar querier.apply_settings( settings ) #applies settings as provided by the instance of ScholarSettings() query = SearchScholarQuery() query.set_author(author) querier.send_query(query) print querier.articles
def getResult(query): querier = ScholarQuerier() citations = 0 url_citations = "" clusterID = "" try: querier.send_query(query) print querier.articles[0].attrs['cluster_id'] citations = querier.articles[0].attrs['num_citations'][0] url_citations = querier.articles[0].attrs['url_citations'][0] clusterID = querier.articles[0].attrs['cluster_id'][0] except: pass return citations, url_citations, clusterID
def literature_search(query_terms, type='full_name'): """ perform a google scholar query with given terms """ querier = ScholarQuerier() settings = ScholarSettings() config = ScholarConf() settings.set_citation_format(ScholarSettings.CITFORM_BIBTEX) querier.apply_settings(settings) query = SearchScholarQuery() papers = [] for item in query_terms.values: repo_id = item[0] if type !='full_name': repo_name = item[1] phrase = item[2] keywords = item[3] start_year = item[4] if keywords: if ',' not in keywords: keywords = keywords + ',' query.set_words_some(keywords) query.set_words(repo_name) query.set_phrase(phrase) phrase_text = repo_name + ', ' + phrase else: phrase = item[1] start_year = item[2] query.set_phrase(phrase) # commontk/CTK, meoyo/AIPS phrase_text = phrase print('search papers for {} ...'.format(phrase_text)) query.set_timeframe(start_year) querier.send_query(query) articles = querier.articles if len(articles)==0: continue results = process_arts(config, item[0], phrase_text, articles) papers = papers + results time_delay = random.randrange(1,10) time.sleep(time_delay) return papers
def getPublications_Title(title): querier = ScholarQuerier() settings = ScholarSettings() querier.apply_settings(settings) query = SearchScholarQuery() publications = [] query.set_words(title) querier.send_query(query) related_list = scholar.json(querier) if related_list: print "No of related publications found : ", print len(related_list) for item in related_list: #print item.keys() #item["relatedTitle"] = title[0] publications.append(item) #time.sleep(random.randrange(10, 40, 2)); #time.sleep(60); return publications
def search(bot, update, args): search_command = ' '.join(args) bot.send_message(chat_id=update.message.chat_id, text="You searched for: " + search_command) querier = ScholarQuerier() query = SearchScholarQuery() query.set_words(args) querier.send_query(query) articles = querier.articles message = "" bot.send_message(chat_id=update.message.chat_id, text="Number of results: " + str(len(articles))) index = 0 for article in articles: bot.send_message(chat_id=update.message.chat_id, text=str(index+1)+". " + article.attrs['title'][0])
def query_scholar_for_papers(author, searchstring): querier = ScholarQuerier() settings = ScholarSettings() settings.set_citation_format(settings.CITFORM_BIBTEX) settings.set_per_page_results(5) querier.apply_settings(settings) query = SearchScholarQuery() query.set_author(author) query.set_phrase(searchstring) querier.send_query(query) return_str = '' if len(querier.articles) > 0: return_str += querier.articles[0].as_citation() + '\n' else: return_str = 'Ooopsie. No results. Maybe we ran over the request limit?' return return_str
def process(document): scholar = ScholarQuerier() query = SearchScholarQuery() query.set_phrase(document.title) scholar.send_query(query) scholar_articles = scholar.articles if len(scholar_articles) == 0: return None title_match_ratio = \ difflib.SequenceMatcher(None, document.title, scholar_articles[0]['title']).ratio() if title_match_ratio < min_title_match_ratio: return None old_tags = document.tags citation_tag = ncitations_to_tag(scholar_articles[0]['num_citations']) new_tags = update_tags(old_tags, [(tag_pattern, citation_tag)]) document.update(tags=new_tags) return scholar_articles[0]['num_citations']
def blocked(): print "Test if blocked...." #time.sleep(random.randrange(10, 40, 2)); time.sleep(60); publications = [] querier = ScholarQuerier() settings = ScholarSettings() querier.apply_settings(settings) query = SearchScholarQuery() query.set_author("Ryan Baker") querier.send_query(query) related_list = scholar.json(querier) if related_list: print "Block Test : No of related publications found : ", print len(related_list) for item in related_list: publications.append(item) if len(publications) == 0: return True else: return False
def blocked(): print "Test if blocked...." #time.sleep(random.randrange(10, 40, 2)); time.sleep(60) publications = [] querier = ScholarQuerier() settings = ScholarSettings() querier.apply_settings(settings) query = SearchScholarQuery() query.set_author("Ryan Baker") querier.send_query(query) related_list = scholar.json(querier) if related_list: print "Block Test : No of related publications found : ", print len(related_list) for item in related_list: publications.append(item) if len(publications) == 0: return True else: return False
def papers_by_query_api(request): if request.method == 'GET': phrase = request.GET.get('phrase', '') if not phrase: return HttpResponseBadRequest() query = SearchScholarQuery() query.set_phrase(phrase) querier = ScholarQuerier() querier.send_query(query) papers = querier.articles if not papers: result = {'papers': [{'title': '', 'id': 0, 'url': '', 'excerpt': ''}]} else: result = {'papers': [{'title': papers[0]['title'], 'id': papers[0]['cluster_id'], 'url': papers[0]['url'], 'excerpt': papers[0]['excerpt']}]} return JsonResponse(result) else: return HttpResponseBadRequest()
def cites_api(request): if request.method == 'GET': paper_id = request.GET.get('paper_id', 0) page = request.GET.get('page', None) if not paper_id or page is None: return HttpResponseBadRequest() query = CitesScholarQuery(paper_id, page) querier = ScholarQuerier() querier.send_query(query) papers = querier.articles cites = [] for paper in papers: if not paper['cluster_id']: continue cites.append({'title': paper['title'], 'id': paper['cluster_id'], 'url': paper['url']}) return JsonResponse({'paper_id': paper_id, 'cites': cites}) else: return HttpResponseBadRequest()
def getPublications(authors): print authors querier = ScholarQuerier() settings = ScholarSettings() querier.apply_settings(settings) query = SearchScholarQuery() publications = [] for author in authors: if len(author) > 0: print "Using Author : ", print author query.set_author(author) querier.send_query(query) related_list = scholar.json(querier) if related_list: print "No of related publications found : ", print len(related_list) for item in related_list: #print item.keys() #item["relatedAuthor"] = author publications.append(item) #time.sleep(random.randrange(10, 40, 2)); time.sleep(20); return publications
def cites_api(request): if request.method == 'GET': paper_id = request.GET.get('paper_id', 0) page = request.GET.get('page', None) if not paper_id or page is None: return HttpResponseBadRequest() query = CitesScholarQuery(paper_id, page) querier = ScholarQuerier() querier.send_query(query) papers = querier.articles cites = [] for paper in papers: if not paper['cluster_id']: continue cites.append({ 'title': paper['title'], 'id': paper['cluster_id'], 'url': paper['url'] }) return JsonResponse({'paper_id': paper_id, 'cites': cites}) else: return HttpResponseBadRequest()
def getPublications(authors): print authors querier = ScholarQuerier() settings = ScholarSettings() querier.apply_settings(settings) query = SearchScholarQuery() publications = [] for author in authors: if len(author) > 0: print "Using Author : ", print author query.set_author(author) querier.send_query(query) related_list = scholar.json(querier) if related_list: print "No of related publications found : ", print len(related_list) for item in related_list: #print item.keys() #item["relatedAuthor"] = author publications.append(item) #time.sleep(random.randrange(10, 40, 2)); time.sleep(20) return publications
ACRONYMS = ['EEG', 'MEG', 'MRI'] querier = ScholarQuerier() settings = ScholarSettings() settings.set_citation_format(ScholarSettings.CITFORM_BIBTEX) querier.apply_settings(settings) query = SearchScholarQuery() query.set_phrase("eelbrain") query.set_timeframe(2012, None) query.set_include_patents(False) bib = parse_file(DST, 'bibtex') start = 0 while True: querier.send_query(query) if len(querier.articles) == 0: break # extract articles for article in querier.articles: querier.get_citation_data(article) # convert to pybtex entry data = parse_bytes(article.citation_data, 'bibtex') assert len(data.entries) == 1 for entry in data.entries.values(): if entry.key in IGNORE: continue elif entry.type != 'article': continue elif entry.key in bib.entries: if entry.fields['journal'] == bib.entries[entry.key].fields['journal']:
def get_results_for(title, author): query = SearchScholarQuery() query.set_author(author) query.set_phrase(title) query.set_num_page_results(1) query.set_scope(True) settings = ScholarSettings() settings.set_citation_format(ScholarSettings.CITFORM_BIBTEX) querier = ScholarQuerier() querier.apply_settings(settings) querier.send_query(query) for art in querier.articles: print art.as_citation(); bibtex_split = art.as_citation().split("\n") reftype = bibtex_split[0][1:-1].split("{")[0].lower(); refid = bibtex_split[0][1:-1].split("{")[1].lower(); bibtex_split.remove(bibtex_split[0]) #print reftype + " " + refid + " " + str(bibtex_split) thismodule = sys.modules[__name__] while(True): try: features_of_type = getattr(thismodule, reftype).func_code.co_varnames[ 1: getattr(thismodule, reftype).func_code.co_argcount ] break; except AttributeError: var = raw_input("Type " + reftype + " not recongised, please enter a known type: "); reftype = var; while (True): arranged_name = [] arranged_value = [] for i in range(1, 10): arranged_name.append(None) arranged_value.append(None) for line in bibtex_split: if ( line.find("=") > -1 ): stored_name = line.split("=")[0].strip() stored_value = line.split("=")[1].strip(); stored_value = stored_value[1:-(len(stored_value)-stored_value.rfind("}"))] if stored_name in features_of_type: arranged_name[features_of_type.index(stored_name)] = stored_name arranged_value[features_of_type.index(stored_name)] = stored_value short_arranged_name = arranged_name[ 0 : arranged_name.index(None)]; short_arranged_value = arranged_value[ 0 : arranged_value.index(None)]; if len(short_arranged_name) == len(features_of_type): return getattr(thismodule, reftype)(refid, *short_arranged_value).__getprintable__(True) else: for feature in features_of_type: if ( feature not in arranged_name ): var = raw_input(feature + " is not provided by the retrieved bibtex entry. Would you like to enter it now? (Y) or (N)"); if var == "Y": var = raw_input("Enter value for " + feature + ": "); bibtex_split.append(feature + " = {" + var + "}");
""".split() ACRONYMS = ['EEG', 'MEG', 'MRI'] querier = ScholarQuerier() settings = ScholarSettings() settings.set_citation_format(ScholarSettings.CITFORM_BIBTEX) querier.apply_settings(settings) query = SearchScholarQuery() query.set_phrase("eelbrain") query.set_timeframe(2012, None) query.set_include_patents(False) bib = parse_file(DST, 'bibtex') start = 0 while True: querier.send_query(query) if len(querier.articles) == 0: break # extract articles for article in querier.articles: querier.get_citation_data(article) # convert to pybtex entry data = parse_bytes(article.citation_data, 'bibtex') assert len(data.entries) == 1 for entry in data.entries.values(): if entry.key in IGNORE: continue elif entry.type != 'article': continue elif entry.key in bib.entries: if entry.fields['journal'] == bib.entries[