Exemple #1
0
def papers_by_query_api(request):
    if request.method == 'GET':
        phrase = request.GET.get('phrase', '')
        if not phrase:
            return HttpResponseBadRequest()

        query = SearchScholarQuery()
        query.set_phrase(phrase)
        querier = ScholarQuerier()
        querier.send_query(query)
        papers = querier.articles

        if not papers:
            result = {
                'papers': [{
                    'title': '',
                    'id': 0,
                    'url': '',
                    'excerpt': ''
                }]
            }
        else:
            result = {
                'papers': [{
                    'title': papers[0]['title'],
                    'id': papers[0]['cluster_id'],
                    'url': papers[0]['url'],
                    'excerpt': papers[0]['excerpt']
                }]
            }
        return JsonResponse(result)
    else:
        return HttpResponseBadRequest()
def getRelatedPublications(author):
    print author
    settings = ScholarSettings() #adjust scholar settings
    querier = ScholarQuerier() #Instance of ScholarQuerier() conducts a search on Google Scholar
    querier.apply_settings(settings) #applies settings as provided by the instance of ScholarSettings()
    query = SearchScholarQuery()
    query.set_author(author)
    querier.send_query(query)
    print querier.articles
Exemple #3
0
def getResult(query):
    querier = ScholarQuerier()
    citations = 0
    url_citations = ""
    clusterID = ""
    try:
        querier.send_query(query)
        print querier.articles[0].attrs['cluster_id']
        citations = querier.articles[0].attrs['num_citations'][0]
        url_citations = querier.articles[0].attrs['url_citations'][0]
        clusterID = querier.articles[0].attrs['cluster_id'][0]
    except:
        pass
    return citations, url_citations, clusterID
Exemple #4
0
def process(document):
    scholar = ScholarQuerier()
    query = SearchScholarQuery()

    # save cookie at first paper
    global save_cookie
    if save_cookie:
        query.set_phrase("quantum theory")
        scholar.send_query(query)
        scholar.save_cookies()
        save_cookie = False

    query.set_phrase(document.title)
    scholar.send_query(query)
    scholar_articles = scholar.articles
    if len(scholar_articles) == 0:
        return None

    title_match_ratio = \
        difflib.SequenceMatcher(None, document.title, scholar_articles[0]['title']).ratio()
    if title_match_ratio < min_title_match_ratio:
        return None

    old_tags = document.tags
    citation_tag = ncitations_to_tag(scholar_articles[0]['num_citations'])
    new_tags = update_tags(old_tags, [(tag_pattern, citation_tag)])
    new_tags.append(str(scholar_articles[0]['num_citations']))
    document.update(tags=new_tags)

    return scholar_articles[0]['num_citations']
def search(bot, update, args):
    search_command = ' '.join(args)

    bot.send_message(chat_id=update.message.chat_id, text="You searched for: " + search_command)

    querier = ScholarQuerier()
    query = SearchScholarQuery()
    query.set_words(args)
    querier.send_query(query)
    
    articles = querier.articles
    
    message = ""

    bot.send_message(chat_id=update.message.chat_id, text="Number of results: " + str(len(articles)))

    index = 0
    for article in articles:
        bot.send_message(chat_id=update.message.chat_id, text=str(index+1)+". " + article.attrs['title'][0])
def getPublications_Title(title):
	querier = ScholarQuerier()
	settings = ScholarSettings()
	querier.apply_settings(settings)
	query = SearchScholarQuery()
	publications = []
	query.set_words(title)
	querier.send_query(query)
	related_list = scholar.json(querier)
	if related_list:
		print "No of related publications found : ",
		print len(related_list)
		for item in related_list:
			#print item.keys()
			#item["relatedTitle"] = title[0]
			publications.append(item)
	#time.sleep(random.randrange(10, 40, 2));
	#time.sleep(60);
	return publications
Exemple #7
0
def process(document):
    scholar = ScholarQuerier()
    query = SearchScholarQuery()
    query.set_phrase(document.title)
    scholar.send_query(query)
    scholar_articles = scholar.articles
    if len(scholar_articles) == 0:
        return None

    title_match_ratio = \
        difflib.SequenceMatcher(None, document.title, scholar_articles[0]['title']).ratio()
    if title_match_ratio < min_title_match_ratio:
        return None

    old_tags = document.tags
    citation_tag = ncitations_to_tag(scholar_articles[0]['num_citations'])
    new_tags = update_tags(old_tags, [(tag_pattern, citation_tag)])
    document.update(tags=new_tags)

    return scholar_articles[0]['num_citations']
def process(document):        
    scholar = ScholarQuerier() 
    query = SearchScholarQuery()
    query.set_phrase(document.title)
    scholar.send_query(query)
    scholar_articles = scholar.articles
    if len(scholar_articles) == 0:
        return None

    title_match_ratio = \
        difflib.SequenceMatcher(None, document.title, scholar_articles[0]['title']).ratio()
    if title_match_ratio < min_title_match_ratio:
        return None

    old_tags = document.tags
    citation_tag = ncitations_to_tag(scholar_articles[0]['num_citations'])
    new_tags = update_tags(old_tags, [(tag_pattern, citation_tag)])
    document.update(tags=new_tags)
    
    return scholar_articles[0]['num_citations']
Exemple #9
0
def query_scholar_for_papers(author, searchstring):

    querier = ScholarQuerier()
    settings = ScholarSettings()
    settings.set_citation_format(settings.CITFORM_BIBTEX)
    settings.set_per_page_results(5)
    querier.apply_settings(settings)
    query = SearchScholarQuery()
    query.set_author(author)
    query.set_phrase(searchstring)

    querier.send_query(query)

    return_str = ''
    if len(querier.articles) > 0:
        return_str += querier.articles[0].as_citation() + '\n'
    else:
        return_str = 'Ooopsie. No results. Maybe we ran over the request limit?'

    return return_str
def blocked():
	print "Test if blocked...."
	#time.sleep(random.randrange(10, 40, 2));
	time.sleep(60);
	publications = []
	querier = ScholarQuerier()
	settings = ScholarSettings()
	querier.apply_settings(settings)
	query = SearchScholarQuery()
	query.set_author("Ryan Baker")
	querier.send_query(query)
	related_list = scholar.json(querier)
	if related_list:
		print "Block Test : No of related publications found : ",
		print len(related_list)
		for item in related_list:
			publications.append(item)
	if len(publications) == 0:
		return True
	else:
		return False
Exemple #11
0
def find_article(canidates):
    """
    A function that attempts to find a good match
    from scholar.py for a set of canidate paper
    titles, it returns the best result.
    """
    canidate_scores = [5000 for dummy in canidates]
    canidate_best_match = ['' for dummy in canidates]
    querier = ScholarQuerier()
    delay = 0
    for ii, canidate in enumerate(canidates):
        sleep(delay)
        querier.UA = random.choice(USER_AGENTS)
        querier.query(canidate)
        for art in querier.articles:
            title = art['title'].encode('ascii', 'ignore')
            score = str_metric(canidate, 
                title)/max(len(title),len(canidate))
            if score < canidate_scores[ii]:
                canidate_scores[ii] = score
                canidate_best_match[ii] = art
        print '----------------------'
        print 'Canidate: '+canidate
        print 'Match:    '+ \
                   canidate_best_match[ii]['title'].encode('ascii', 'ignore')
        print 'Score     '+str(canidate_scores[ii])
        querier.clear_articles()
        delay = max(random.gauss(30, 30), 5)
def cites_api(request):
    if request.method == 'GET':
        paper_id = request.GET.get('paper_id', 0)
        page = request.GET.get('page', None)
        if not paper_id or page is None:
            return HttpResponseBadRequest()
        query = CitesScholarQuery(paper_id, page)
        querier = ScholarQuerier()
        querier.send_query(query)
        papers = querier.articles

        cites = []
        for paper in papers:
            if not paper['cluster_id']:
                continue
            cites.append({'title': paper['title'],
                          'id': paper['cluster_id'],
                          'url': paper['url']})

        return JsonResponse({'paper_id': paper_id, 'cites': cites})
    else:
        return HttpResponseBadRequest()
def papers_by_query_api(request):
    if request.method == 'GET':
        phrase = request.GET.get('phrase', '')
        if not phrase:
            return HttpResponseBadRequest()

        query = SearchScholarQuery()
        query.set_phrase(phrase)
        querier = ScholarQuerier()
        querier.send_query(query)
        papers = querier.articles

        if not papers:
            result = {'papers': [{'title': '', 'id': 0, 'url': '', 'excerpt': ''}]}
        else:
            result = {'papers': [{'title': papers[0]['title'],
                                  'id': papers[0]['cluster_id'],
                                  'url': papers[0]['url'],
                                  'excerpt': papers[0]['excerpt']}]}
        return JsonResponse(result)
    else:
        return HttpResponseBadRequest()
def getPublications(authors):
	print authors
	querier = ScholarQuerier()
	settings = ScholarSettings()
	querier.apply_settings(settings)
	query = SearchScholarQuery()
	publications = []
	for author in authors:
		if len(author) > 0:
			print "Using Author : ", 
			print author
			query.set_author(author)
			querier.send_query(query)
			related_list = scholar.json(querier)
			if related_list:
				print "No of related publications found : ",
				print len(related_list)
				for item in related_list:
					#print item.keys()
					#item["relatedAuthor"] = author
					publications.append(item)
			#time.sleep(random.randrange(10, 40, 2));
			time.sleep(20);
	return publications
Exemple #15
0
def cites_api(request):
    if request.method == 'GET':
        paper_id = request.GET.get('paper_id', 0)
        page = request.GET.get('page', None)
        if not paper_id or page is None:
            return HttpResponseBadRequest()
        query = CitesScholarQuery(paper_id, page)
        querier = ScholarQuerier()
        querier.send_query(query)
        papers = querier.articles

        cites = []
        for paper in papers:
            if not paper['cluster_id']:
                continue
            cites.append({
                'title': paper['title'],
                'id': paper['cluster_id'],
                'url': paper['url']
            })

        return JsonResponse({'paper_id': paper_id, 'cites': cites})
    else:
        return HttpResponseBadRequest()
Exemple #16
0
def getPublications(author):
    print author
    querier = ScholarQuerier()
    settings = ScholarSettings()
    querier.apply_settings(settings)
    query = SearchScholarQuery()
    query.set_author(author)
    querier.send_query(query)
    #scholar.csv(querier)
    scholar.txt(querier, with_globals=False)
Exemple #17
0
def getRelatedPublications(author):
    print author
    settings = ScholarSettings()  #adjust scholar settings
    querier = ScholarQuerier(
    )  #Instance of ScholarQuerier() conducts a search on Google Scholar
    querier.apply_settings(
        settings
    )  #applies settings as provided by the instance of ScholarSettings()
    query = SearchScholarQuery()
    query.set_author(author)
    querier.send_query(query)
    print querier.articles
def literature_search(query_terms, type='full_name'):
    """
    perform a google scholar query with given terms
    """

    querier = ScholarQuerier()
    settings = ScholarSettings()
    config = ScholarConf()
    settings.set_citation_format(ScholarSettings.CITFORM_BIBTEX)
    querier.apply_settings(settings)
    query = SearchScholarQuery()

    papers = []
    for item in query_terms.values:
        repo_id = item[0]
        
        if type !='full_name':
            repo_name = item[1]
            phrase = item[2]
            keywords = item[3]
            start_year = item[4]
            if keywords:
                if ',' not in keywords:
                    keywords = keywords + ','
                query.set_words_some(keywords)                

            query.set_words(repo_name)
            query.set_phrase(phrase)

            phrase_text = repo_name + ', ' + phrase
        else:
            phrase = item[1]
            start_year = item[2]

            query.set_phrase(phrase) # commontk/CTK, meoyo/AIPS
            phrase_text = phrase
        print('search papers for {} ...'.format(phrase_text))
        query.set_timeframe(start_year)
        querier.send_query(query)
        articles = querier.articles
        if len(articles)==0:
            continue
        results = process_arts(config, item[0], phrase_text, articles)
        papers = papers + results
        time_delay = random.randrange(1,10)
        time.sleep(time_delay)

    return papers
def getPublications_Title(title):
    querier = ScholarQuerier()
    settings = ScholarSettings()
    querier.apply_settings(settings)
    query = SearchScholarQuery()
    publications = []
    query.set_words(title)
    querier.send_query(query)
    related_list = scholar.json(querier)
    if related_list:
        print "No of related publications found : ",
        print len(related_list)
        for item in related_list:
            #print item.keys()
            #item["relatedTitle"] = title[0]
            publications.append(item)
    #time.sleep(random.randrange(10, 40, 2));
    #time.sleep(60);
    return publications
Exemple #20
0
def query_scholar_for_papers(author, searchstring):

    querier = ScholarQuerier()
    settings = ScholarSettings()
    settings.set_citation_format(settings.CITFORM_BIBTEX)
    settings.set_per_page_results(5)
    querier.apply_settings(settings)
    query = SearchScholarQuery()
    query.set_author(author)
    query.set_phrase(searchstring)

    querier.send_query(query)

    return_str = ''
    if len(querier.articles) > 0:
        return_str += querier.articles[0].as_citation() + '\n'
    else:
        return_str = 'Ooopsie. No results. Maybe we ran over the request limit?'

    return return_str
def blocked():
    print "Test if blocked...."
    #time.sleep(random.randrange(10, 40, 2));
    time.sleep(60)
    publications = []
    querier = ScholarQuerier()
    settings = ScholarSettings()
    querier.apply_settings(settings)
    query = SearchScholarQuery()
    query.set_author("Ryan Baker")
    querier.send_query(query)
    related_list = scholar.json(querier)
    if related_list:
        print "Block Test : No of related publications found : ",
        print len(related_list)
        for item in related_list:
            publications.append(item)
    if len(publications) == 0:
        return True
    else:
        return False
def getPublications(authors):
    print authors
    querier = ScholarQuerier()
    settings = ScholarSettings()
    querier.apply_settings(settings)
    query = SearchScholarQuery()
    publications = []
    for author in authors:
        if len(author) > 0:
            print "Using Author : ",
            print author
            query.set_author(author)
            querier.send_query(query)
            related_list = scholar.json(querier)
            if related_list:
                print "No of related publications found : ",
                print len(related_list)
                for item in related_list:
                    #print item.keys()
                    #item["relatedAuthor"] = author
                    publications.append(item)
            #time.sleep(random.randrange(10, 40, 2));
            time.sleep(20)
    return publications
Exemple #23
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import logging
import yapbib.biblist as biblist
from scholar import ScholarQuerier
from . import db

q = ScholarQuerier()

COMMON_FIELDS = ('id', 'title', 'year', 'author', 'url', 'citation_count',
                 'version_count', 'citation_url', 'version_url', 'related_url')


class Article(object):
    @staticmethod
    def from_db(d):
        if d is None:
            return None
        a = Article()
        for name in COMMON_FIELDS:
            assert hasattr(d, name), name
            setattr(a, name, getattr(d, name))
        return a

    @property
    def complete(self):
        """Whether article has complete information."""
        for key in COMMON_FIELDS:
            if not hasattr(self, key) or getattr(self, key) is None:
                return False
import re

from pybtex.database import parse_bytes, parse_file
from scholar import ScholarQuerier, ScholarSettings, SearchScholarQuery


DST = Path(__file__).absolute().parents[1] / 'publications.bib'
IGNORE = """
vo2014cytotoxicity
takeilnatriureticpeptideisolatedfromeelbrain
matchintemporal
brodbeck2018transformation
""".split()
ACRONYMS = ['EEG', 'MEG', 'MRI']

querier = ScholarQuerier()
settings = ScholarSettings()
settings.set_citation_format(ScholarSettings.CITFORM_BIBTEX)
querier.apply_settings(settings)
query = SearchScholarQuery()
query.set_phrase("eelbrain")
query.set_timeframe(2012, None)
query.set_include_patents(False)


bib = parse_file(DST, 'bibtex')
start = 0
while True:
    querier.send_query(query)
    if len(querier.articles) == 0:
        break
Exemple #25
0
            return url
    except:
        return None


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='biburl.py')
    parser.add_argument('-f',
                        '--bib_file',
                        type=str,
                        required=True,
                        help="Bibtex file <references.bib>")
    args = parser.parse_args()

    # Initialize scholar querier
    querier = ScholarQuerier()

    # Setup scholar settings
    settings = ScholarSettings()
    settings.set_citation_format(ScholarSettings.CITFORM_BIBTEX)

    # Apply settings
    querier.apply_settings(settings)

    # Load bib file
    if not os.path.exists(args.bib_file):
        raise IOException('File does not exist %s' % args.bib_file)

    with open(args.bib_file) as bib_file:
        bib_db = bibtexparser.load(bib_file)
        for j, item in enumerate(bib_db.entries):
def get_results_for(title, author):
    
    query = SearchScholarQuery()
    query.set_author(author)
    query.set_phrase(title)
    query.set_num_page_results(1)
    query.set_scope(True)

    settings = ScholarSettings()
    settings.set_citation_format(ScholarSettings.CITFORM_BIBTEX)
 
    querier = ScholarQuerier()
    querier.apply_settings(settings)
    querier.send_query(query)

    for art in querier.articles:
        
        print art.as_citation();
        
        bibtex_split = art.as_citation().split("\n")
        reftype = bibtex_split[0][1:-1].split("{")[0].lower(); 
        refid = bibtex_split[0][1:-1].split("{")[1].lower(); 
        bibtex_split.remove(bibtex_split[0])
    
        #print reftype + " " + refid + " " + str(bibtex_split)
    
        thismodule = sys.modules[__name__]

        while(True):
            
            try:
                features_of_type = getattr(thismodule, reftype).func_code.co_varnames[ 1: getattr(thismodule, reftype).func_code.co_argcount ]
                break;
            except AttributeError:
                var = raw_input("Type " + reftype + " not recongised, please enter a known type: ");
                reftype = var;
        
        while (True):
            arranged_name = []
            arranged_value = []
            for i in range(1, 10):
                arranged_name.append(None)
                arranged_value.append(None)
           
            for line in bibtex_split:
                if ( line.find("=") > -1 ):
                    stored_name = line.split("=")[0].strip()
                    stored_value = line.split("=")[1].strip();
                    stored_value = stored_value[1:-(len(stored_value)-stored_value.rfind("}"))]
                    if stored_name in features_of_type:
                        arranged_name[features_of_type.index(stored_name)] = stored_name
                        arranged_value[features_of_type.index(stored_name)] = stored_value
              
            short_arranged_name = arranged_name[ 0 : arranged_name.index(None)];
            short_arranged_value = arranged_value[ 0 : arranged_value.index(None)];
            
            if len(short_arranged_name) == len(features_of_type):
                return getattr(thismodule, reftype)(refid, *short_arranged_value).__getprintable__(True)
            else:
                for feature in features_of_type:
                    if ( feature not in arranged_name ):
                        var = raw_input(feature + " is not provided by the retrieved bibtex entry. Would you like to enter it now? (Y) or (N)");
                        if var == "Y":
                            var = raw_input("Enter value for " + feature + ": ");
                            bibtex_split.append(feature + " = {" + var + "}");
Exemple #27
0
ScholarConf.COOKIE_JAR_FILE = 'cookies.txt'
min_sleep_time_sec = 5
max_sleep_time_sec = 10

df = pd.read_csv("../data_seperate_sheet/Profile_Publications_Standard.csv")

#cites =[]
#citation_list = [] # [year of pub]

with open('citations_counts_25307.csv', "w") as csv_file:
    writer = csv.writer(csv_file, delimiter=',')
    for k,v in df['Article Title'][25307:].iteritems(): 
        # why is df['Article Title'] of type dict??
        
        querier = ScholarQuerier()
        settings = ScholarSettings()
        settings.set_citation_format(ScholarSettings.CITFORM_BIBTEX)
        querier.apply_settings(settings)
        query = SearchScholarQuery()
        query.set_phrase(v)
        query.set_scope(True)
        querier.send_query(query)
        if querier.articles:
            cites=querier.articles[0].__getitem__('num_citations')
            citation_list=querier.articles[0].__getitem__('url_citations')
        else:
            cites=0
            citation_list= ""
            
    #    json_results = []
from string import ascii_lowercase
import re

from pybtex.database import parse_bytes, parse_file
from scholar import ScholarQuerier, ScholarSettings, SearchScholarQuery

DST = Path(__file__).absolute().parents[1] / 'publications.bib'
IGNORE = """
vo2014cytotoxicity
takeilnatriureticpeptideisolatedfromeelbrain
matchintemporal
brodbeck2018transformation
""".split()
ACRONYMS = ['EEG', 'MEG', 'MRI']

querier = ScholarQuerier()
settings = ScholarSettings()
settings.set_citation_format(ScholarSettings.CITFORM_BIBTEX)
querier.apply_settings(settings)
query = SearchScholarQuery()
query.set_phrase("eelbrain")
query.set_timeframe(2012, None)
query.set_include_patents(False)

bib = parse_file(DST, 'bibtex')
start = 0
while True:
    querier.send_query(query)
    if len(querier.articles) == 0:
        break
    # extract articles
#input: full path to xlsx file containing article titles from scopus. Assumes that 
#titles are first column in the sheet 'Qatar_Scopus'. 
#output: csv file containing paper titles, citations, possibly free PDF link from web. 
from openpyxl import load_workbook
import sys
from scholar import ScholarQuerier, ScholarSettings, SearchScholarQuery,onecsv
from time import sleep
import csv

querier = ScholarQuerier()
settings = ScholarSettings()
querier.apply_settings(settings)
query = SearchScholarQuery()
query.set_scope(True)
alldata=[]
counter=1

xlsxfile=sys.argv[1]
wb = load_workbook(xlsxfile, use_iterators=True)
print wb.get_sheet_names()
ws = wb.get_sheet_by_name('Qatar_Scopus')

for row in ws.iter_rows(row_offset=1):
	if row[0].value is not None:
		temp=[]
		title=row[0].value.encode("utf-8")
		query.set_phrase(title)
		query.set_num_page_results(1)
		querier.send_query(query)
 	   	x=onecsv(querier)
	  	if (x!=None):
	return False

# edit config.json first
mendeley = create_client()

if skip_documents:
    print('Already tagged documents by mendeley_add_citations.py are skipped.')
else:
    print('Processing all documents, including already tagged by mendeley_add_citations.py.')   
print('See skip_documents variable in mendeley_add_citations.py to change this.\n')
print('Tags are added immediately. You can interrupt the script and continue later.\n')

print('citations\tyear\tMendeley library title')
num_skipped = 0
documents = mendeley.library(items=-1)
scholar = ScholarQuerier(count=1)

for docid in documents['document_ids']:
	document = mendeley.document_details(docid)
	if skip_documents and has_citation_tag(document['tags'], ['citations_.*']):
		num_skipped = num_skipped+1
		continue

	try:
		scholar.query(document['title'])
		scholar_articles = scholar.articles
		if len(scholar_articles) == 0:
			print('No scholar articles found for ' + document['title'])
			continue
	except urllib2.HTTPError as e:
		print e.msg