def rlinput(prompt, prefill=''):
    def hook():
        readline.insert_text(prefill)

    readline.set_startup_hook(hook)
    try:
        return raw_input(prompt)
    finally:
        readline.set_startup_hook()
def main(infile, outfile, start_year, start_abstract):

    # check arguments
    if not os.path.exists(infile):
        print('%s file not found, exiting.' % infile)
        sys.exit(1)

    if start_abstract > -1 and start_year == -1:
        print('Warning: start_abstract specified, but no start year; ' +
              'please specify starting year with -y.')
        sys.exit(1)

    # Read in JSONL file
    with open(infile, 'r') as jf:
        docs = map(lambda x: json.loads(x), jf)
        print 'Obtained %d docs.' % len(docs)

        # Sort by filename
        docs = sorted(docs, key=lambda d: d['file'])

        for doc in docs:
            # If you want to skip to a particular abstract, use these lines.
            (yr,
             abst) = map(int,
                         doc['file'].split('/')[-1].split('.')[0].split('_'))
            if start_year > -1 and yr < start_year:
                continue
            if start_abstract > -1 and yr == start_year and abst < start_abstract:
                continue
            readline.set_startup_hook()
            s = raw_input('Review %s? [Y/n/q]' % doc['file'])
            if s == 'n' or s == 'N':
                continue
            if s == 'q' or s == 'Q':
                break

            # Show the first few lines
            print('-------------------------------')
            lines = doc['content'].split('\n')
            non_empty = [l for l in lines if len(l) > 1]
            for l in non_empty[:6]:
                print(l)

            update_field(doc, 'grobid:header_Title')
            update_field(doc, 'grobid:header_Authors')
            print('\n')

        # Write out new JSONL file
        print('Writing to %s' % outfile)
        count = 0
        with open(outfile, 'wb', 1) as out:
            for doc in docs:
                out.write(json.dumps(doc))
                out.write('\n')
                count += 1
        print('Stored %d documents in %s' % (count, outfile))
def update_field(doc, grobid_f_name):

    value = doc['metadata'].get(grobid_f_name, '')
    if value == '' and grobid_f_name == 'grobid:header_Title':
        # If title is unknown, try to guess it from content.
        value = re.search(r'[^\.]+\.[^\.\n]+\n+([^\.]+)\.',
                          doc['content']).group(1).title()
    # Standardize title capitalization
    if value.isupper():
        value = string.capwords(doc['metadata'].get(grobid_f_name, ''))

    readline.set_startup_hook(
        lambda: readline.insert_text(value.encode(sys.stdin.encoding)))
    new_value = raw_input('%s: Edit the %s: ' %
                          (doc['file'].split('/')[-1], grobid_f_name))
    new_value = unicode(new_value, 'utf8')
    doc['metadata'][grobid_f_name] = new_value
    print('New value: %s' % doc['metadata'][grobid_f_name])
try:
    import gnureadline as readline
except ImportError:
    import readline


def startup_hook():
    readline.insert_text('from startup_hook')


def pre_input_hook():
    readline.insert_text(' from pre_input_hook')
    readline.redisplay()


readline.set_startup_hook(startup_hook)
readline.set_pre_input_hook(pre_input_hook)
readline.parse_and_bind('tab: complete')

while True:
    line = input('Prompt ("stop" to quit): ')
    if line == 'stop':
        break
    print('ENTERED: {!r}'.format(line))

Exemple #5
0
    # Title, authors, primaryauthor (+ venue, year?)

    default_value = d[f]
    # If title is Unknown, try to guess it from content.
    try:
        if f == 'title' and default_value == 'Unknown':
            default_value = re.search(r'[^\.]+\.[^\.\n]+\n+([^\.]+)\.',
                                      d['content']).group(1).title()
    except:
        pass
    # Strip numbers out of authors
    if f == 'authors':
        # Note: authors is a string inside a list, hence [0]
        default_value = re.sub(r'[0-9]', '', unicode(default_value[0]))

    readline.set_startup_hook(lambda: readline.insert_text(default_value))

    new_value = raw_input('Edit the %s: ' % f)
    new_value = unicode(new_value, 'utf8')

    if new_value != d[f]:
        d[f_old] = d[f]
        print 'Updating Solr.  Hang onto your hat!'
        d[f] = new_value
        # This also seems to commit by default, yay.
        # Why don't I need to put f_old here?
        s.add([d], fieldUpdates={f: 'set'})

# Copyright 2017, by the California Institute of Technology. ALL
# RIGHTS RESERVED. United States Government Sponsorship
# acknowledged. Any commercial use must be negotiated with the Office
#
"""
"""

#end_pymotw_header

try:
    import gnureadline as readline
except ImportError:
    import readline


def startup_hook():
    readline.insert_text('from startup_hook')


def pre_input_hook():
    readline.insert_text(' from pre_input_hook')
    readline.redisplay()


readline.set_startup_hook(startup_hook)
readline.set_pre_input_hook(pre_input_hook)
readline.parse_and_bind('tab: complete')

while True:
    line = input('Prompt ("stop" to quit): ')
    if line == 'stop':
        break
    print('ENTERED: {!r}'.format(line))
Exemple #7
0
def rlinput(prompt, prefill=''):
    readline.set_startup_hook(lambda: readline.insert_text(prefill))
    try:
        return input(prompt)
    finally:
        readline.set_startup_hook()