def process(document): scholar = ScholarQuerier() query = SearchScholarQuery() # save cookie at first paper global save_cookie if save_cookie: query.set_phrase("quantum theory") scholar.send_query(query) scholar.save_cookies() save_cookie = False query.set_phrase(document.title) scholar.send_query(query) scholar_articles = scholar.articles if len(scholar_articles) == 0: return None title_match_ratio = \ difflib.SequenceMatcher(None, document.title, scholar_articles[0]['title']).ratio() if title_match_ratio < min_title_match_ratio: return None old_tags = document.tags citation_tag = ncitations_to_tag(scholar_articles[0]['num_citations']) new_tags = update_tags(old_tags, [(tag_pattern, citation_tag)]) new_tags.append(str(scholar_articles[0]['num_citations'])) document.update(tags=new_tags) return scholar_articles[0]['num_citations']
querier = ScholarQuerier() settings = ScholarSettings() settings.set_citation_format(ScholarSettings.CITFORM_BIBTEX) querier.apply_settings(settings) query = SearchScholarQuery() query.set_phrase(v) query.set_scope(True) querier.send_query(query) if querier.articles: cites=querier.articles[0].__getitem__('num_citations') citation_list=querier.articles[0].__getitem__('url_citations') else: cites=0 citation_list= "" # json_results = [] # file_name = 'query_data/cites_for_article_'+str(k+1)+'.json' # for art in querier.articles: # json_results.append( # {key: art.attrs[key][0] for key in art.attrs.keys()}) # with open(file_name, 'wb') as f: # json.dump(json_results, f) print('{} iter *** {} cited'.format(k+1,cites)) writer.writerow([cites,citation_list]) querier.save_cookies() sleep(randint(min_sleep_time_sec, max_sleep_time_sec))