Example #1
0
def main():

    # Path to the excel sheet containing the list of paper title in the second colum, heading as 'Name'.
    pathToFile = "PaperList.xlsx"

    xl = pd.ExcelFile(pathToFile)
    df = xl.parse("Sheet1")
    bt = []
    f = df['PaperName']
    for i in range(f.size):
        a = f[i]
        x = a.replace(u'\xa0', ' ')
        args = x.encode('ascii', 'ignore')
        # args ="Detection of skin cancer by classification of Raman spectra"
        biblist = gs.query(args)
        print(biblist[0])
        k = biblist[0]
        k1 = k.replace(u'\n ', ' ')
        x1 = k1.encode('ascii', 'ignore')
        bt.append(x1)
    df1 = pd.DataFrame({'bibtex': bt})
    f = pd.concat([df, df1], axis=1)

    # Create a Pandas Excel writer using XlsxWriter as the engine.
    writer = pd.ExcelWriter('bibtexFile.xlsx', engine='xlsxwriter')

    # Convert the dataframe to an XlsxWriter Excel object.
    f.to_excel(writer, sheet_name='Sheet1')

    # Get the xlsxwriter objects from the dataframe writer object.
    workbook = writer.book
    worksheet = writer.sheets['Sheet1']
Example #2
0
def extractr(filePath):

    # Path to the excel sheet containing the list of paper title in the second colum, heading as 'Name'.
    pathToFile = filePath
    cwd = os.getcwd()
    sdir = cwd + '/bibtexFile.xlsx'
    xl = pd.ExcelFile(pathToFile)
    df = xl.parse("Sheet1")
    bt = []
    f = df['PaperName']
    for i in range(f.size):
        a1 = f[i]
        x1 = a1.replace(u'\xa0', ' ')
        args1 = x1.encode('ascii', 'ignore')
        biblist = gs.query(args1)
        print(biblist[0])
        k = biblist[0]
        k1 = k.replace(u'\n ', ' ')
        x2 = k1.encode('ascii', 'ignore')
        x2 = x2.decode('utf-8')
        bt.append(x2)
    df1 = pd.DataFrame({'bibtex': bt})
    f = pd.concat([df, df1], axis=1)

    # Create a Pandas Excel writer using XlsxWriter as the engine.
    writer = pd.ExcelWriter(sdir, engine='xlsxwriter')

    # Convert the dataframe to an XlsxWriter Excel object.
    f.to_excel(writer, sheet_name='Sheet1')

    writer.save()
def pull_info_from_gscholar(query, accepted_fields=None):
    """Look for entry in google scholar."""
    import gscholar
    bibtex_string = gscholar.query(query)[0]
    if accepted_fields is None:
        accepted_fields = [
            'title', 'year', 'volume', 'number', 'pages', 'ISBN', 'journal',
            'publisher', 'month', 'author', 'doi'
        ]
    details = {}
    for field in accepted_fields:
        value = re.search(field + r'={([^}]*)}', bibtex_string)
        if value:
            details[field] = value[1]
    return details
Example #4
0
def display_info(fpath):
    if not os.path.exists(fpath):
        raise IOError('File Not Found')
    info = pdfinfo(fpath)
    
    guessed_data = ['title', 'author']
    guess = not all(guess in info for guess in guessed_data)
    info_from_text = extract_from_pdftext(fpath, guess)
    for k in info_from_text:
        if k not in info or (k is 'title' and not is_title_like(info[k])):
            info[k] = info_from_text[k]

    query_string = info['doi'] if 'doi' in info else (
            info['title'] if 'title' in info else 
            info['abstract'] if 'abstract' in info else 
            None)

    bibtex = ''
    if query_string: # query google
        try:
            query_string = query_string.encode('ascii','replace')
            text = gscholar.query(query_string.decode('utf8', 'replace'), gscholar.FORMAT_BIBTEX, False)
            if text:
                text = text[0] # assume the first one
                lines = text.splitlines()
                if 'doi' in info:
                    lines.insert(1,'  doi={{{}}},'.format(info['doi']))
                bibtex = '\n'.join(lines)
        except urllib2.URLError:
            pass

    if bibtex:
        update_info_with_bibtex(info, bibtex, guessed_data, True)

    notetitle = ''.join([info.get('title', 'Unknown Title'),
        ' - ', info.get('author', 'Authors Unknown')])
    if guess:
        notetitle += ' METADATA NEEDS REVIEW'

    notebody = u''
    if 'abstract' in info:
        notebody += info['abstract'] + u'\n\n'
    if bibtex:
        notebody += bibtex.decode('utf8') + u'\n\n'

    insert_into_evernote(notetitle, notebody, fpath)
Example #5
0
def ask_google_for_clean_citations():
    washed_bibdata_from_google=list()
    counter = 0
    for dirty_title in get_list_of_titles_to_feed_google():
        try:
            print dirty_title
            counter += 1
            ##Note that the readme and documentation on github are wrong as
            ##of 1 Dec 2013. You have to feed 2 arguments to gscholar.py,
            ##instead of 1 argument as they suggest. See here for more
            ##details: http://stackoverflow.com/questions/13200709/extract-google-scholar-results-using-python-or-r
            washed_bibdata_from_google.append(gscholar.query(dirty_title,outformat=4))
        except Exception:
            print "\nYou have hit Google scholar with too many queries and they banned you. The program will now wait a few hours and try again."
            print "The relevant error message is \n"
            print traceback.format_exc()
            countdown_timer(9999,10000)
            pass
        if counter > 25:
            if not dirty_title == get_list_of_titles_to_feed_google()[-1]:
                countdown_timer(207,503)##you can turn this off if you only have a few citations (approx. 50 or fewer).
    ##        Otherwise you have to rate limit yourself to avoid a ban.
    return washed_bibdata_from_google
Example #6
0
def scholar_get(title, db):
    # print(gscholar.query("linked open data", allresults=True))

    if title not in db:
        query = gscholar.query(title)
        if len(query) < 1:
            return {"title": "", "authors": [], "year": "", "bibtex": ""}
        db[title] = query[0]
        time.sleep(random.randint(0, 10))

    meta = {}

    parser = bibtexparser.bparser.BibTexParser()
    parser.customization = customizations
    raw_entry = bibtexparser.loads(db[title], parser=parser)
    entry = raw_entry.entries[0]

    meta["title"] = entry['title'].strip()
    meta["authors"] = entry['author'].replace(", ", " ").split("and")
    meta["year"] = entry.get('year', "").strip()
    meta["bibtex"] = bibtexparser.dumps(raw_entry)

    return meta
Example #7
0
	def query_gscholar(self, query):
		import gscholar;
		return gscholar.query(query);
Example #8
0
import gscholar
import os
import sys 

# Take the current bibtex open on chrome and append it to the bibtex file

# Set up bibtex filepath
filepath = "/Users/janmeppe/Dropbox/School/Master/Master Thesis/tex/"
filename = "master-thesis.bib"
os.chdir(filepath) # Change dir to filepath 

# Get user input as query, request to gscholar and encode to UTF8
user_arg = sys.argv[1]
query_unicode = gscholar.query(user_arg)
query         = [x.encode('UTF8') for x in query_unicode]

# Append bibtex output to prespecified path 
with open(filename,'a') as out:
    for item in query:
        out.write("%s" % item)
    out.write("\n") # such that the next item is well spaced 

# @article{nameYEARword, ... find cite_id between the first '{' and ','
cite_id = query[0].split(',', 1)[0].split('{',1)[1]
print "Citation added succefully!\n\cite{%s}\n\citeA{%s}" % (cite_id, cite_id)
Example #9
0
# coding:utf-8
import gscholar

items = gscholar.query('')

gscholar.query("some author or title")
Example #10
0
def main():
    usage = 'Usage: %prog [options] {pdf | "search terms"}'
    parser = optparse.OptionParser(usage)
    parser.add_option("-a",
                      "--all",
                      action="store_true",
                      dest="all",
                      default=False,
                      help="show all bibtex results")
    parser.add_option("-d",
                      "--debug",
                      action="store_true",
                      dest="debug",
                      default=False,
                      help="show debugging output")
    parser.add_option("-r",
                      "--rename",
                      action="store_true",
                      dest="rename",
                      default=False,
                      help="rename file (asks before doing it)")
    parser.add_option(
        "-f",
        "--outputformat",
        dest='output',
        default="bibtex",
        help=
        "Output format. Available formats are: bibtex, endnote, refman, wenxianwang [default: %default]"
    )
    parser.add_option("-s",
                      "--startpage",
                      dest='startpage',
                      help="Page number to start parsing PDF file at.")
    parser.add_option('-V',
                      '--version',
                      action='store_true',
                      help='Print version and quit.')

    (options, args) = parser.parse_args()
    if options.debug is True:
        logger.setLevel(logging.DEBUG)
    if options.version:
        print(gs.__VERSION__)
        return
    if options.output == 'bibtex':
        outformat = gs.FORMAT_BIBTEX
    elif options.output == 'endnote':
        outformat = gs.FORMAT_ENDNOTE
    elif options.output == 'refman':
        outformat = gs.FORMAT_REFMAN
    elif options.output == 'wenxianwang':
        outformat = gs.FORMAT_WENXIANWANG
    if len(args) != 1:
        parser.error("No argument given, nothing to do.")
        sys.exit(1)
    args = args[0]
    pdfmode = False
    if os.path.exists(args):
        logger.debug(
            "File exist, assuming you want me to lookup the pdf: {filename}.".
            format(filename=args))
        pdfmode = True
        biblist = gs.pdflookup(args, all, outformat, options.startpage)
    else:
        logger.debug(
            "Assuming you want me to lookup the query: {query}".format(
                query=args))
        biblist = gs.query(args, outformat, options.all)
    if len(biblist) < 1:
        print("No results found, try again with a different query!")
        sys.exit(1)
    if options.all is True:
        logger.debug("All results:")
        for i in biblist:
            print(i)
    else:
        logger.debug("First result:")
        print(biblist[0])
    if options.rename is True:
        if not pdfmode:
            print(
                "You asked me to rename the pdf but didn't tell me which file to rename, aborting."
            )
            sys.exit(1)
        else:
            gs.rename_file(args, biblist[0])
Example #11
0
######## Google Scholar

import scholarly

search_query_2 = scholarly.search_keyword('thermodynamics')
keyword = next(search_query_2).fill()
title2 = [pub.bib['title'] for pub in keyword.publications]

title3 = [pub for pub in keyword.publications]

################################################################################

import gscholar

gscholar.query("thermodynamics")
Example #12
0
import bibtexparser
import gscholar
import time
with open('ref.bib') as bib:
    db = bibtexparser.load(bib)
ress = []

for it in (db.entries):
    title = it['title']
    id = it['ID']
    title = title.strip('{}')
    succ = False
    while not succ:
        try:
            res = gscholar.query(title)
            time.sleep(10)
            succ = True
        except Exception as e:
            print(e)
            # sleep(10)
            break
    if not succ: break
    it_gs = bibtexparser.loads(res[0])
    it_gs = it_gs.entries[0]
    # from IPython import embed; embed()
    it_gs['ID'] = id

    ress.append(it_gs)
    print(it_gs)
    # break
Example #13
0
for file in pdffiles:
    entry = gscholar.pdflookup(input_dir + file, allresults=False, outformat=4)[0]
    bib_list.add(entry)

# Get bib entries for titles in papers.txt:
try:
    f = open(join(input_dir, 'papers.txt'), "r")
    papers = f.read().split("\n")
    f.close
except Exception as e:
    papers = []
for p in papers:
    if p == "":
        continue
    try:
        entry = gscholar.query(p)[0]
        bib_list.add(entry)
    except IndexError as e:
        print("Bad input line: {}".format(p))
        failed_list.append(p)

# Print bib file
if len(bib_list) > 0:
    with open(join(output_dir, "library.bib"), "w+") as f:
        f.write("The following entries have been generated by a script and therefore should be checked for accuracy.\nCreated by Ross Gales, https://github.com/rosscg/generate-bib\n\n")
        for l in bib_list:
            f.write(l)
    f.close()

# Print failed lines to file
if len(failed_list) > 0:
 def getGScholar(self):
     """If you are feeling lucky."""
     bibtex = query(self.doi, 4)[0]
     self.bibtex = bibtex.decode('utf-8')
     return self.bibtex
path_reading_group = path_work + r'\Reading_group'
path_code = path_work + r'\Code\Etienne_repos\code_google_scholar'

list_papers = [f for f in os.listdir(path_reading_group)\
                if os.path.isfile(path_reading_group + r'\%s' %f)]
list_papers = list_papers[:-2] # exclude two files...
# txt_file = open(path_code + r'\list_papers_reading_group.txt', 'w')
# txt_file.write('\n'.join(list_papers))
# txt_file.close()

str_all_bibtex = ''
list_not_found = []
for paper in list_papers:
  print paper
  paper_bibtex = gscholar.query(paper.split('_')[2].rstrip('.pdf'), 4)
  if paper_bibtex:
    str_all_bibtex += '%s\n' %paper_bibtex[0]
  else:
    list_not_found.append(paper)
  time.sleep(0.5)

# with open(path_code + r'\file_bibtex.txt', 'w') as fichier:
  # fichier.write(str_bibtex)

  
# # ############
# # DEPRECATED
# # ############
  
# # build urllib2 opener