def get_ccg_lexicon(): lexicon = Lexicon() filename = os.path.join(LEXICON_DIR, 'geo-lexicon.txt') entries = [] with open(filename) as f: for line in f: x, y = line.strip().split(' :- NP : ') entries.append((x, y)) lexicon.add_entries(entries, False) return lexicon
def get_ccg_lexicon(): lexicon = Lexicon() filename = os.path.join(DB_DIR, 'lexicon.txt') entries = [] with open(filename) as f: for line in f: x, y = line.strip().split(' :- NP : ') y = y.replace(':', ':_') entries.append((x, y)) lexicon.add_entries(entries) return lexicon
def get_lexicon_from_raw_lexicon_then_write(basename, newname): filename = os.path.join(LEXICON_DIR, basename) newfilename = os.path.join(LEXICON_DIR, newname) lex = Lexicon() entries = [] with open(filename) as f: for line in f: lexicon_tuple = parse_entry(line) name = lexicon_tuple[0] entity = normalize_entity(lexicon_tuple[1]) if entity == '': continue entries.append((name, entity)) lex.add_entries(entries, False) with open(newfilename, 'w') as f: for name, entity in lex.entries.items(): #print('%s :- NP : %s' % (name, entity), file=f) pass return lex
def get_manual_lexicon(): DAYS_OF_WEEK = [(s, '%s:_da' % s) for s in ('monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday')] # For dates WORD_NUMBERS = [('one', '1:_dn'), ('two', '2:_dn'), ('three', '3:_dn'), ('four', '4:_dn'), ('five', '5:_dn'), ('six', '6:_dn'), ('seven', '7:_dn'), ('eight', '8:_dn'), ('nine', '9:_dn'), ('ten', '10:_dn'), ('eleven', '11:_dn'), ('twelve', '12:_dn'), ('thirteen', '13:_dn'), ('fourteen', '14:_dn'), ('fifteen', '15:_dn'), ('sixteen', '16:_dn'), ('seventeen', '17:_dn'), ('eighteen', '18:_dn'), ('nineteen', '19:_dn'), ('twenty', '20:_dn'), ('twenty one', '21:_dn'), ('twenty two', '22:_dn'), ('twenty three', '23:_dn'), ('twenty four', '24:_dn'), ('twenty five', '25:_dn'), ('twenty six', '26:_dn'), ('twenty seven', '27:_dn'), ('twenty eight', '28:_dn'), ('twenty nine', '29:_dn'), ('thirty', '30:_dn'), ('thirty one', '31:_dn')] ORDINAL_NUMBERS = [ ('second', '2:_dn'), ('third', '3:_dn'), ('fourth', '4:_dn'), ('fifth', '5:_dn'), ('sixth', '6:_dn'), ('seventh', '7:_dn'), ('eighth', '8:_dn'), ('ninth', '9:_dn'), ('tenth', '10:_dn'), ('eleventh', '11:_dn'), ('twelfth', '12:_dn'), ('thirteenth', '13:_dn'), ('fourteenth', '14:_dn'), ('fifteenth', '15:_dn'), ('sixteenth', '16:_dn'), ('seventeenth', '17:_dn'), ('eighteenth', '18:_dn'), ('nineteenth', '19:_dn'), ('twentieth', '20:_dn'), ('twenty first', '21:_dn'), ('twenty second', '22:_dn'), ('twenty third', '23:_dn'), ('twenty fourth', '24:_dn'), ('twenty fifth', '25:_dn'), ('twenty sixth', '26:_dn'), ('twenty seventh', '27:_dn'), ('twenty eighth', '28:_dn'), ('twenty ninth', '29:_dn'), ('thirtieth', '30:_dn'), ('thirty first', '31:_dn') ] # Prefer first class to "first = 1" MEALS = [(m, '%s:_me' % m) for m in ('breakfast', 'lunch', 'dinner', 'snack')] lex = Lexicon() lex.add_entries(read_db('CITY.TAB', 1, 1, '_ci', strip_id=['.'])) lex.add_entries(DAYS_OF_WEEK) lex.add_entries([(x + 's', y) for x, y in DAYS_OF_WEEK]) # Handle "on tuesdays" lex.add_entries( read_db('AIRLINE.TAB', 0, 1, '_al', strip_name=[', inc.', ', ltd.'])) handle_times(lex) lex.add_entries(read_db('INTERVAL.TAB', 0, 0, '_pd')) lex.add_entries(WORD_NUMBERS) lex.add_entries(ORDINAL_NUMBERS) lex.add_entries(read_db('MONTH.TAB', 1, 1, '_mn')) lex.add_entries( read_db('AIRPORT.TAB', 0, 1, '_ap', strip_name=[], split_name=['/'])) lex.add_entries(read_db('COMP_CLS.TAB', 1, 1, '_cl')) lex.add_entries(read_db('CLS_SVC.TAB', 0, 0, '_fb', prefix_name='code ')) handle_flight_numbers(lex) lex.add_entries(MEALS) handle_dollars(lex) return lex
def get_manual_lexicon(): DAYS_OF_WEEK = [ (s, '%s:_da' % s) for s in ('monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday') ] # For dates WORD_NUMBERS = [('one', '1:_dn'), ('two', '2:_dn'), ('three', '3:_dn'), ('four', '4:_dn'), ('five', '5:_dn'), ('six', '6:_dn'), ('seven', '7:_dn'), ('eight', '8:_dn'), ('nine', '9:_dn'), ('ten', '10:_dn'), ('eleven', '11:_dn'), ('twelve', '12:_dn'), ('thirteen', '13:_dn'), ('fourteen', '14:_dn'), ('fifteen', '15:_dn'), ('sixteen', '16:_dn'), ('seventeen', '17:_dn'), ('eighteen', '18:_dn'), ('nineteen', '19:_dn'), ('twenty', '20:_dn'), ('twenty one', '21:_dn'), ('twenty two', '22:_dn'), ('twenty three', '23:_dn'), ('twenty four', '24:_dn'), ('twenty five', '25:_dn'), ('twenty six', '26:_dn'), ('twenty seven', '27:_dn'), ('twenty eight', '28:_dn'), ('twenty nine', '29:_dn'), ('thirty', '30:_dn'), ('thirty one', '31:_dn')] ORDINAL_NUMBERS = [('second', '2:_dn'), ('third', '3:_dn'), ('fourth', '4:_dn'), ('fifth', '5:_dn'), ('sixth', '6:_dn'), ('seventh', '7:_dn'), ('eighth', '8:_dn'), ('ninth', '9:_dn'), ('tenth', '10:_dn'), ('eleventh', '11:_dn'), ('twelfth', '12:_dn'), ('thirteenth', '13:_dn'), ('fourteenth', '14:_dn'), ('fifteenth', '15:_dn'), ('sixteenth', '16:_dn'), ('seventeenth', '17:_dn'), ('eighteenth', '18:_dn'), ('nineteenth', '19:_dn'), ('twentieth', '20:_dn'), ('twenty first', '21:_dn'), ('twenty second', '22:_dn'), ('twenty third', '23:_dn'), ('twenty fourth', '24:_dn'), ('twenty fifth', '25:_dn'), ('twenty sixth', '26:_dn'), ('twenty seventh', '27:_dn'), ('twenty eighth', '28:_dn'), ('twenty ninth', '29:_dn'), ('thirtieth', '30:_dn'), ('thirty first', '31:_dn')] # Prefer first class to "first = 1" MEALS = [(m, '%s:_me' % m) for m in ('breakfast', 'lunch', 'dinner', 'snack')] lex = Lexicon() lex.add_entries(read_db('CITY.TAB', 1, 1, '_ci', strip_id=['.'])) lex.add_entries(DAYS_OF_WEEK) lex.add_entries([(x + 's', y) for x, y in DAYS_OF_WEEK]) # Handle "on tuesdays" lex.add_entries(read_db('AIRLINE.TAB', 0, 1, '_al', strip_name=[', inc.', ', ltd.'])) handle_times(lex) lex.add_entries(read_db('INTERVAL.TAB', 0, 0, '_pd')) lex.add_entries(WORD_NUMBERS) lex.add_entries(ORDINAL_NUMBERS) lex.add_entries(read_db('MONTH.TAB', 1, 1, '_mn')) lex.add_entries(read_db('AIRPORT.TAB', 0, 1, '_ap', strip_name=[], split_name=['/'])) lex.add_entries(read_db('COMP_CLS.TAB', 1, 1, '_cl')) lex.add_entries(read_db('CLS_SVC.TAB', 0, 0, '_fb', prefix_name='code ')) handle_flight_numbers(lex) lex.add_entries(MEALS) handle_dollars(lex) return lex