コード例 #1
0
ファイル: scholar_get.py プロジェクト: exrhizo/citation_graph
def find_article(canidates):
    """
    A function that attempts to find a good match
    from scholar.py for a set of canidate paper
    titles, it returns the best result.
    """
    canidate_scores = [5000 for dummy in canidates]
    canidate_best_match = ['' for dummy in canidates]
    querier = ScholarQuerier()
    delay = 0
    for ii, canidate in enumerate(canidates):
        sleep(delay)
        querier.UA = random.choice(USER_AGENTS)
        querier.query(canidate)
        for art in querier.articles:
            title = art['title'].encode('ascii', 'ignore')
            score = str_metric(canidate, 
                title)/max(len(title),len(canidate))
            if score < canidate_scores[ii]:
                canidate_scores[ii] = score
                canidate_best_match[ii] = art
        print '----------------------'
        print 'Canidate: '+canidate
        print 'Match:    '+ \
                   canidate_best_match[ii]['title'].encode('ascii', 'ignore')
        print 'Score     '+str(canidate_scores[ii])
        querier.clear_articles()
        delay = max(random.gauss(30, 30), 5)
コード例 #2
0
print('See skip_documents variable in mendeley_add_citations.py to change this.\n')
print('Tags are added immediately. You can interrupt the script and continue later.\n')

print('citations\tyear\tMendeley library title')
num_skipped = 0
documents = mendeley.library(items=-1)
scholar = ScholarQuerier(count=1)

for docid in documents['document_ids']:
	document = mendeley.document_details(docid)
	if skip_documents and has_citation_tag(document['tags'], ['citations_.*']):
		num_skipped = num_skipped+1
		continue

	try:
		scholar.query(document['title'])
		scholar_articles = scholar.articles
		if len(scholar_articles) == 0:
			print('No scholar articles found for ' + document['title'])
			continue
	except urllib2.HTTPError as e:
		print e.msg
		print e.reason
		sys.exit(-1)

	if 'year' in document:
		year = document['year']
	else:
		year = -1

	print('%s\t\t%s\t%s' %