Beispiel #1
0
def get_ccg_lexicon():
    lexicon = Lexicon()
    filename = os.path.join(LEXICON_DIR, 'geo-lexicon.txt')
    entries = []
    with open(filename) as f:
        for line in f:
            x, y = line.strip().split(' :- NP : ')
            entries.append((x, y))
    lexicon.add_entries(entries, False)
    return lexicon
def get_ccg_lexicon():
    lexicon = Lexicon()
    filename = os.path.join(DB_DIR, 'lexicon.txt')
    entries = []
    with open(filename) as f:
        for line in f:
            x, y = line.strip().split(' :- NP : ')
            y = y.replace(':', ':_')
            entries.append((x, y))
    lexicon.add_entries(entries)
    return lexicon
def get_ccg_lexicon():
  lexicon = Lexicon()
  filename = os.path.join(DB_DIR, 'lexicon.txt')
  entries = []
  with open(filename) as f:
    for line in f:
      x, y = line.strip().split(' :- NP : ')
      y = y.replace(':', ':_')
      entries.append((x, y))
  lexicon.add_entries(entries)
  return lexicon
Beispiel #4
0
def get_lexicon_from_raw_lexicon_then_write(basename, newname):
    filename = os.path.join(LEXICON_DIR, basename)
    newfilename = os.path.join(LEXICON_DIR, newname)
    lex = Lexicon()
    entries = []
    with open(filename) as f:
        for line in f:
            lexicon_tuple = parse_entry(line)
            name = lexicon_tuple[0]
            entity = normalize_entity(lexicon_tuple[1])
            if entity == '':
                continue
            entries.append((name, entity))
    lex.add_entries(entries, False)
    with open(newfilename, 'w') as f:
        for name, entity in lex.entries.items():
            #print('%s :- NP : %s' % (name, entity), file=f)
            pass
    return lex
def get_manual_lexicon():
    DAYS_OF_WEEK = [(s, '%s:_da' % s)
                    for s in ('monday', 'tuesday', 'wednesday', 'thursday',
                              'friday', 'saturday', 'sunday')]
    # For dates
    WORD_NUMBERS = [('one', '1:_dn'), ('two', '2:_dn'), ('three', '3:_dn'),
                    ('four', '4:_dn'), ('five', '5:_dn'), ('six', '6:_dn'),
                    ('seven', '7:_dn'), ('eight', '8:_dn'), ('nine', '9:_dn'),
                    ('ten', '10:_dn'), ('eleven', '11:_dn'),
                    ('twelve', '12:_dn'), ('thirteen', '13:_dn'),
                    ('fourteen', '14:_dn'), ('fifteen', '15:_dn'),
                    ('sixteen', '16:_dn'), ('seventeen', '17:_dn'),
                    ('eighteen', '18:_dn'), ('nineteen', '19:_dn'),
                    ('twenty', '20:_dn'), ('twenty one', '21:_dn'),
                    ('twenty two', '22:_dn'), ('twenty three', '23:_dn'),
                    ('twenty four', '24:_dn'), ('twenty five', '25:_dn'),
                    ('twenty six', '26:_dn'), ('twenty seven', '27:_dn'),
                    ('twenty eight', '28:_dn'), ('twenty nine', '29:_dn'),
                    ('thirty', '30:_dn'), ('thirty one', '31:_dn')]
    ORDINAL_NUMBERS = [
        ('second', '2:_dn'), ('third', '3:_dn'), ('fourth', '4:_dn'),
        ('fifth', '5:_dn'), ('sixth', '6:_dn'), ('seventh', '7:_dn'),
        ('eighth', '8:_dn'), ('ninth', '9:_dn'), ('tenth', '10:_dn'),
        ('eleventh', '11:_dn'), ('twelfth', '12:_dn'),
        ('thirteenth', '13:_dn'), ('fourteenth', '14:_dn'),
        ('fifteenth', '15:_dn'), ('sixteenth', '16:_dn'),
        ('seventeenth', '17:_dn'), ('eighteenth', '18:_dn'),
        ('nineteenth', '19:_dn'), ('twentieth', '20:_dn'),
        ('twenty first', '21:_dn'), ('twenty second', '22:_dn'),
        ('twenty third', '23:_dn'), ('twenty fourth', '24:_dn'),
        ('twenty fifth', '25:_dn'), ('twenty sixth', '26:_dn'),
        ('twenty seventh', '27:_dn'), ('twenty eighth', '28:_dn'),
        ('twenty ninth', '29:_dn'), ('thirtieth', '30:_dn'),
        ('thirty first', '31:_dn')
    ]  # Prefer first class to "first = 1"
    MEALS = [(m, '%s:_me' % m)
             for m in ('breakfast', 'lunch', 'dinner', 'snack')]

    lex = Lexicon()
    lex.add_entries(read_db('CITY.TAB', 1, 1, '_ci', strip_id=['.']))
    lex.add_entries(DAYS_OF_WEEK)
    lex.add_entries([(x + 's', y)
                     for x, y in DAYS_OF_WEEK])  # Handle "on tuesdays"
    lex.add_entries(
        read_db('AIRLINE.TAB', 0, 1, '_al', strip_name=[', inc.', ', ltd.']))
    handle_times(lex)
    lex.add_entries(read_db('INTERVAL.TAB', 0, 0, '_pd'))
    lex.add_entries(WORD_NUMBERS)
    lex.add_entries(ORDINAL_NUMBERS)
    lex.add_entries(read_db('MONTH.TAB', 1, 1, '_mn'))
    lex.add_entries(
        read_db('AIRPORT.TAB', 0, 1, '_ap', strip_name=[], split_name=['/']))
    lex.add_entries(read_db('COMP_CLS.TAB', 1, 1, '_cl'))
    lex.add_entries(read_db('CLS_SVC.TAB', 0, 0, '_fb', prefix_name='code '))
    handle_flight_numbers(lex)
    lex.add_entries(MEALS)
    handle_dollars(lex)
    return lex
def get_manual_lexicon():
  DAYS_OF_WEEK = [
      (s, '%s:_da' % s) 
      for s in ('monday', 'tuesday', 'wednesday', 'thursday', 
                'friday', 'saturday', 'sunday')
  ]
  # For dates
  WORD_NUMBERS = [('one', '1:_dn'), ('two', '2:_dn'), ('three', '3:_dn'), ('four', '4:_dn'), ('five', '5:_dn'), ('six', '6:_dn'), ('seven', '7:_dn'), ('eight', '8:_dn'), ('nine', '9:_dn'), ('ten', '10:_dn'), ('eleven', '11:_dn'), ('twelve', '12:_dn'), ('thirteen', '13:_dn'), ('fourteen', '14:_dn'), ('fifteen', '15:_dn'), ('sixteen', '16:_dn'), ('seventeen', '17:_dn'), ('eighteen', '18:_dn'), ('nineteen', '19:_dn'), ('twenty', '20:_dn'), ('twenty one', '21:_dn'), ('twenty two', '22:_dn'), ('twenty three', '23:_dn'), ('twenty four', '24:_dn'), ('twenty five', '25:_dn'), ('twenty six', '26:_dn'), ('twenty seven', '27:_dn'), ('twenty eight', '28:_dn'), ('twenty nine', '29:_dn'), ('thirty', '30:_dn'), ('thirty one', '31:_dn')]
  ORDINAL_NUMBERS = [('second', '2:_dn'), ('third', '3:_dn'), ('fourth', '4:_dn'), ('fifth', '5:_dn'), ('sixth', '6:_dn'), ('seventh', '7:_dn'), ('eighth', '8:_dn'), ('ninth', '9:_dn'), ('tenth', '10:_dn'), ('eleventh', '11:_dn'), ('twelfth', '12:_dn'), ('thirteenth', '13:_dn'), ('fourteenth', '14:_dn'), ('fifteenth', '15:_dn'), ('sixteenth', '16:_dn'), ('seventeenth', '17:_dn'), ('eighteenth', '18:_dn'), ('nineteenth', '19:_dn'), ('twentieth', '20:_dn'), ('twenty first', '21:_dn'), ('twenty second', '22:_dn'), ('twenty third', '23:_dn'), ('twenty fourth', '24:_dn'), ('twenty fifth', '25:_dn'), ('twenty sixth', '26:_dn'), ('twenty seventh', '27:_dn'), ('twenty eighth', '28:_dn'), ('twenty ninth', '29:_dn'), ('thirtieth', '30:_dn'), ('thirty first', '31:_dn')]  # Prefer first class to "first = 1"
  MEALS = [(m, '%s:_me' % m) for m in ('breakfast', 'lunch', 'dinner', 'snack')]

  lex = Lexicon()
  lex.add_entries(read_db('CITY.TAB', 1, 1, '_ci', strip_id=['.']))
  lex.add_entries(DAYS_OF_WEEK)
  lex.add_entries([(x + 's', y) for x, y in DAYS_OF_WEEK])  # Handle "on tuesdays"
  lex.add_entries(read_db('AIRLINE.TAB', 0, 1, '_al',
                          strip_name=[', inc.', ', ltd.']))
  handle_times(lex)
  lex.add_entries(read_db('INTERVAL.TAB', 0, 0, '_pd'))
  lex.add_entries(WORD_NUMBERS)
  lex.add_entries(ORDINAL_NUMBERS)
  lex.add_entries(read_db('MONTH.TAB', 1, 1, '_mn'))
  lex.add_entries(read_db('AIRPORT.TAB', 0, 1, '_ap',
                          strip_name=[], split_name=['/']))
  lex.add_entries(read_db('COMP_CLS.TAB', 1, 1, '_cl'))
  lex.add_entries(read_db('CLS_SVC.TAB', 0, 0, '_fb', prefix_name='code '))
  handle_flight_numbers(lex)
  lex.add_entries(MEALS)
  handle_dollars(lex)
  return lex