def find_article(canidates): """ A function that attempts to find a good match from scholar.py for a set of canidate paper titles, it returns the best result. """ canidate_scores = [5000 for dummy in canidates] canidate_best_match = ['' for dummy in canidates] querier = ScholarQuerier() delay = 0 for ii, canidate in enumerate(canidates): sleep(delay) querier.UA = random.choice(USER_AGENTS) querier.query(canidate) for art in querier.articles: title = art['title'].encode('ascii', 'ignore') score = str_metric(canidate, title)/max(len(title),len(canidate)) if score < canidate_scores[ii]: canidate_scores[ii] = score canidate_best_match[ii] = art print '----------------------' print 'Canidate: '+canidate print 'Match: '+ \ canidate_best_match[ii]['title'].encode('ascii', 'ignore') print 'Score '+str(canidate_scores[ii]) querier.clear_articles() delay = max(random.gauss(30, 30), 5)
print('See skip_documents variable in mendeley_add_citations.py to change this.\n') print('Tags are added immediately. You can interrupt the script and continue later.\n') print('citations\tyear\tMendeley library title') num_skipped = 0 documents = mendeley.library(items=-1) scholar = ScholarQuerier(count=1) for docid in documents['document_ids']: document = mendeley.document_details(docid) if skip_documents and has_citation_tag(document['tags'], ['citations_.*']): num_skipped = num_skipped+1 continue try: scholar.query(document['title']) scholar_articles = scholar.articles if len(scholar_articles) == 0: print('No scholar articles found for ' + document['title']) continue except urllib2.HTTPError as e: print e.msg print e.reason sys.exit(-1) if 'year' in document: year = document['year'] else: year = -1 print('%s\t\t%s\t%s' %