def lexemes_from_file(filename): line_num = 0 lexeme_file = open(filename, "r") lexemes = [] words = {} curr_cat = None for line in lexeme_file: if not line.strip(): continue l = line.split() line_num += 1 if l[0] == "--": curr_cat = l[1] else: try: if line.strip() not in words.keys(): words[line.strip()] = [] words[line.strip()].append(curr_cat) except: raise BadFileError("Lexeme file is invalid (line %d) '%s'" % (line_num, line)) for word, categories in words.items(): lexemes.append(lexeme(word, categories)) return lexemes
curr_cat = l[1] else: try: if line.strip() not in words.keys(): words[line.strip()] = [] words[line.strip()].append(curr_cat) except: raise BadFileError("Lexeme file is invalid (line %d) '%s'" % (line_num, line)) for word, categories in words.items(): lexemes.append(lexeme(word, categories)) return lexemes if __name__ == "__main__": lexemes = [ lexeme("a", ["Art"]), lexeme("the", ["Art"]), lexeme("child", ["N"]), lexeme("man", ["N"]), lexeme("laughs", ["V"]), lexeme("eats", ["V"]), ] lex = lexicon(lexemes) print lex.by_lex print lex.by_cat