Beispiel #1
0
def gscholar_bib(title):
    #print('[WARNING] Google scholar search is currently disabled.')
    #return # CURRENTLY DISABLED.

    # perform a Google scholar search based on the title.
    import urllib.request, urllib.error, urllib.parse
    from skid.utils import gscholar
    import pybtex
    from pybtex.database.input import bibtex
    from nameparser import HumanName
    #import latexcodec

    print(colors.magenta % 'Google scholar results for title:')
    try:
        results = gscholar.query(title, allresults=False)
    except (KeyboardInterrupt, urllib.error.URLError) as e:
        results = []
        print('[%s] %s' % (colors.yellow % 'warn',
                           'Google scholar search failed (error: %s)' % e))
        raise e

    for x in results:
        print(x)

        #x = x.decode('ascii', errors='ignore')

        try:
            b = bibtex.Parser().parse_stream(StringIO(x))
        except pybtex.scanner.TokenRequired as e:
            print('failed to parse bibtex with error:', e)
            return

        [(_, e)] = list(b.entries.items())

        #print colors.yellow % (dict(e.fields),)
        title = e.fields['title']
        year = e.fields.get('year', '')

        authors = e.persons['author']

        author = ' ; '.join(str(HumanName(str(x))) for x in authors)

        #title = title.decode('latex')
        #author = author.decode('latex').replace('{','').replace('}','')

        print(title)
        print(year)
        print(author)
        print()

        return {'title': title, 'year': year, 'author': author}
Beispiel #2
0
def gscholar_bib(title):
    # perform a Google scholar search based on the title.
    import urllib2
    from skid.utils import gscholar
    import pybtex
    from pybtex.database.input import bibtex
    from nameparser import HumanName
    #import latexcodec

    print magenta % 'Google scholar results for title:'
    try:
        results = gscholar.query(title, allresults=False)
    except (KeyboardInterrupt, urllib2.URLError) as e:
        results = []
        print '[%s] %s' % (yellow % 'warn',
                           'Google scholar search failed (error: %s)' % e)

    for x in results:
        print x

        x = x.decode('ascii', errors='ignore')

        try:
            b = bibtex.Parser().parse_stream(StringIO(x))
        except pybtex.scanner.TokenRequired as e:
            print 'failed to parse bibtex with error:', e
            return

        [(_, e)] = b.entries.items()

        #print yellow % (dict(e.fields),)
        title = e.fields['title']
        year = e.fields.get('year', '')
        author = ' ; '.join(
            unicode(HumanName(x))
            for x in re.split(r'\band\b', e.fields['author']))

        #title = title.decode('latex')
        #author = author.decode('latex').replace('{','').replace('}','')

        print title
        print year
        print author
        print

        return {'title': title, 'year': year, 'author': author}
Beispiel #3
0
def gscholar_bib(title):
    # perform a Google scholar search based on the title.
    import urllib2
    from skid.utils import gscholar
    import pybtex
    from pybtex.database.input import bibtex
    from nameparser import HumanName
    #import latexcodec

    print magenta % 'Google scholar results for title:'
    try:
        results = gscholar.query(title, allresults=False)
    except (KeyboardInterrupt, urllib2.URLError) as e:
        results = []
        print '[%s] %s' % (yellow % 'warn', 'Google scholar search failed (error: %s)' % e)

    for x in results:
        print x

        x = robust_read_string(x)

        try:
            b = bibtex.Parser().parse_stream(StringIO(x))
        except pybtex.scanner.TokenRequired as e:
            print 'failed to parse bibtex with error:', e
            return

        [(_,e)] = b.entries.items()

        #print yellow % (dict(e.fields),)
        title = e.fields['title']
        year = e.fields.get('year', '')
        author = ' ; '.join(unicode(HumanName(x)) for x in re.split(r'\band\b', e.fields['author']))

        #title = title.decode('latex')
        #author = author.decode('latex').replace('{','').replace('}','')

        print title
        print year
        print author
        print

        return {'title': title,
                'year': year,
                'author': author}
Beispiel #4
0
from skid import config
from skid.add import Document

from skid.utils.gscholar import query
from arsenal.terminal import colors
from random import shuffle

files = config.CACHE.files()

shuffle(files)

for f in files:

    if not f.endswith('.pdf'):
        continue

    d = Document(f)

    meta = d.parse_notes()

    print(colors.green % ('file://' + d.cached))
    print(colors.yellow % meta['title'])
    print(colors.yellow % ' ; '.join(meta['author']))

    results = query(meta['title'])
    print(len(results), 'results')
    for x in results:
        print(x)

    break