Beispiel #1
0
def lexemes_from_file(filename):
    line_num = 0
    lexeme_file = open(filename, "r")
    lexemes = []
    words = {}
    curr_cat = None
    
    for line in lexeme_file:
        if not line.strip():
            continue
            
        l = line.split()
        line_num += 1
        
        if l[0] == "--":
            curr_cat = l[1]
        else:
            try:
                if line.strip() not in words.keys():
                    words[line.strip()] = []
                words[line.strip()].append(curr_cat)
            except:
                raise BadFileError("Lexeme file is invalid (line %d) '%s'" % (line_num, line))
            
    for word, categories in words.items():
        lexemes.append(lexeme(word, categories))
        
    return lexemes
Beispiel #2
0
            curr_cat = l[1]
        else:
            try:
                if line.strip() not in words.keys():
                    words[line.strip()] = []
                words[line.strip()].append(curr_cat)
            except:
                raise BadFileError("Lexeme file is invalid (line %d) '%s'" % (line_num, line))
            
    for word, categories in words.items():
        lexemes.append(lexeme(word, categories))
        
    return lexemes
        

if __name__ == "__main__":
    lexemes = [
        lexeme("a", ["Art"]),
        lexeme("the", ["Art"]),
        
        lexeme("child", ["N"]),
        lexeme("man", ["N"]),
        
        lexeme("laughs", ["V"]),
        lexeme("eats", ["V"]),        
    ]
    
    lex = lexicon(lexemes)
    
    print lex.by_lex
    print lex.by_cat