def generate(self, from_source, to_path, name, delimiter=',', verbose=False): PyDic.make_pydic_index(from_source=from_source, to_path=to_path, name=name, delimiter=delimiter, verbose=verbose)
def __init__(self, methodName='runTest'): self.current_dir = os.path.dirname(os.path.realpath(__file__)) self.temp_dict1_path = tempfile.mkdtemp() self.dict1_file = open(os.path.join(self.current_dir, 'dict1.txt')) PyDicCreator().generate(self.dict1_file, self.temp_dict1_path, 'dict1', verbose=False) self.dict1 = PyDic(self.temp_dict1_path) self.dict1m = PyDic('dict1.txt') return super(TestPyDicBase, self).__init__(methodName)
def test_file_load(self): dict1 = PyDic('dict1.txt') self.assertEquals(dict1.id(u'kotem'), ['*****@*****.**']) self.assertEquals(dict1.id(u'utrafieniu'), ['*****@*****.**']) self.assertEquals(dict1.id(u'pszczoły'), ['*****@*****.**']) self.assertEquals(dict1.id(u'spodniami'), ['*****@*****.**']) self.assertEquals(dict1.id(u'piloty'), ['*****@*****.**', '*****@*****.**']) self.assertEquals(dict1.id(u'piloci'), ['*****@*****.**'])
def test_common_prefix(self): self.assertEquals(PyDic.common_prefix( ['abakus', 'abakusa', 'abakusowi', 'abakus', 'abakusem', 'abakusie', 'abakusie', 'abakusy', 'abakusów', 'abakusom', 'abakusy', 'abakusami', 'abakusach', 'abakusy']), ['abakus', '', 'a', 'owi', '', 'em', 'ie', 'ie', 'y', 'ów', 'om', 'y', 'ami', 'ach', 'y'] )
def test_common_prefix(self): self.assertEquals( PyDic.common_prefix([ 'abakus', 'abakusa', 'abakusowi', 'abakus', 'abakusem', 'abakusie', 'abakusie', 'abakusy', 'abakusów', 'abakusom', 'abakusy', 'abakusami', 'abakusach', 'abakusy' ]), [ 'abakus', '', 'a', 'owi', '', 'em', 'ie', 'ie', 'y', 'ów', 'om', 'y', 'ami', 'ach', 'y' ])
def run(self): """ Runs as a command line tool """ parser = argparse.ArgumentParser( description='Makes inflection of a flat text file with words.') parser.add_argument('-d', '--delimiter', default=u',') parser.add_argument('-f', '--dictionary-file', help="path to file with text dictionary", required=True) parser.add_argument('-t', '--output', help="output file name") parser.add_argument('-b', '--base-forms', action="store_true", help="only base forms") parser.add_argument('-v', '--verbose', action="store_true", help="debug verbose mode") parser.add_argument('input', metavar='FILE', help="filename to process", nargs='?') args = parser.parse_args() input_stream = sys.stdin if args.input: input_stream = open(args.input) output_stream = sys.stdout if args.output: output_stream = open(args.output, 'w') self.dictionary = PyDic(args.dictionary_file) self.index = self.load_index(self.dictionary) for line in input_stream: line = line.decode('utf-8').strip() if line and line[0] != '#': print >> output_stream, args.delimiter.join( self.process(self.dictionary, self.index, line, debug=args.verbose)).encode('utf-8') else: print >> output_stream, line.encode('utf-8')
class TestPyDicBase(unittest.TestCase): def __init__(self, methodName='runTest'): self.current_dir = os.path.dirname(os.path.realpath(__file__)) self.temp_dict1_path = tempfile.mkdtemp() self.dict1_file = open(os.path.join(self.current_dir, 'dict1.txt')) PyDicCreator().generate(self.dict1_file, self.temp_dict1_path, 'dict1', verbose=False) self.dict1 = PyDic(self.temp_dict1_path) self.dict1m = PyDic('dict1.txt') return super(TestPyDicBase, self).__init__(methodName) # def setUp(self): def test_file_load(self): dict1 = PyDic('dict1.txt') self.assertEquals(dict1.id(u'kotem'), ['*****@*****.**']) self.assertEquals(dict1.id(u'utrafieniu'), ['*****@*****.**']) self.assertEquals(dict1.id(u'pszczoły'), ['*****@*****.**']) self.assertEquals(dict1.id(u'spodniami'), ['*****@*****.**']) self.assertEquals(dict1.id(u'piloty'), ['*****@*****.**', '*****@*****.**']) self.assertEquals(dict1.id(u'piloci'), ['*****@*****.**']) def test_name(self): self.assertEquals(self.dict1.name, 'dict1') self.assertEquals(self.dict1m.name, 'dict1.txt') def test_id(self): self.assertEquals(self.dict1.id(u'kotem'), ['1@dict1']) self.assertEquals(self.dict1.id(u'utrafieniu'), ['7@dict1']) self.assertEquals(self.dict1.id(u'pszczoły'), ['4@dict1']) self.assertEquals(self.dict1.id(u'spodniami'), ['3@dict1']) self.assertEquals(self.dict1.id(u'piloty'), ['11@dict1', '12@dict1']) self.assertEquals(self.dict1.id(u'piloci'), ['10@dict1']) self.assertEquals(self.dict1m.id(u'kotem'), ['*****@*****.**']) self.assertEquals(self.dict1m.id(u'utrafieniu'), ['*****@*****.**']) self.assertEquals(self.dict1m.id(u'pszczoły'), ['*****@*****.**']) self.assertEquals(self.dict1m.id(u'spodniami'), ['*****@*****.**']) self.assertEquals(self.dict1m.id(u'piloty'), ['*****@*****.**', '*****@*****.**']) self.assertEquals(self.dict1m.id(u'piloci'), ['*****@*****.**']) def test_a_id(self): self.assertEquals(self.dict1.a_id(u'pszczoly'), ['4@dict1']) self.assertEquals(self.dict1.a_id(u'pszczoły'), ['4@dict1']) self.assertEquals(self.dict1m.a_id(u'pszczoly'), ['*****@*****.**']) self.assertEquals(self.dict1m.a_id(u'pszczóly'), []) def test_id_forms(self): self.assertEquals(self.dict1.id_forms(PyDicId('4@dict1')), [ u"pszczoła", u"pszczoły", u"pszczole", u"pszczołę", u"pszczołą", u"pszczoło", u"pszczół", u"pszczołom", u"pszczołami", u"pszczołach", ]) self.assertEquals(self.dict1.id_forms(PyDicId('3@dict1')), [ u"spodnie", u"spodni", u"spodniom", u"spodniami", u"spodniach", ]) self.assertEquals(self.dict1.id_forms(PyDicId('30000@dict1')), []) self.assertEquals(self.dict1m.id_forms(PyDicId('*****@*****.**')), [ u"pszczoła", u"pszczoły", u"pszczole", u"pszczołę", u"pszczołą", u"pszczoło", u"pszczół", u"pszczołom", u"pszczołami", u"pszczołach", ]) self.assertEquals(self.dict1m.id_forms(PyDicId('*****@*****.**')), [ u"spodnie", u"spodni", u"spodniom", u"spodniami", u"spodniach", ]) self.assertEquals(self.dict1m.id_forms(PyDicId('*****@*****.**')), []) def test_word_forms(self): self.assertEquals(self.dict1.word_forms(u"pszczołę"), [[ u"pszczoła", u"pszczoły", u"pszczole", u"pszczołę", u"pszczołą", u"pszczoło", u"pszczół", u"pszczołom", u"pszczołami", u"pszczołach", ]]) self.assertEquals(self.dict1.word_forms(u"spodniach"), [[ u"spodnie", u"spodni", u"spodniom", u"spodniami", u"spodniach", ]]) self.assertEquals(self.dict1.word_forms(u"spodniachhhhhhhh"), []) self.assertEquals(self.dict1m.word_forms(u"pszczołę"), [[ u"pszczoła", u"pszczoły", u"pszczole", u"pszczołę", u"pszczołą", u"pszczoło", u"pszczół", u"pszczołom", u"pszczołami", u"pszczołach", ]]) self.assertEquals(self.dict1m.word_forms(u"spodniach"), [[ u"spodnie", u"spodni", u"spodniom", u"spodniami", u"spodniach", ]]) self.assertEquals(self.dict1m.word_forms(u"spodniachhhhhhhh"), []) def test_a_word_forms(self): self.assertEquals(self.dict1.a_word_forms(u"pszczole"), [[ u"pszczoła", u"pszczoły", u"pszczole", u"pszczołę", u"pszczołą", u"pszczoło", u"pszczół", u"pszczołom", u"pszczołami", u"pszczołach", ]]) self.assertEquals(self.dict1.a_word_forms(u"spodniach"), [[ u"spodnie", u"spodni", u"spodniom", u"spodniami", u"spodniach", ]]) self.assertEquals(self.dict1.a_word_forms(u"spodniachhhhhhhh"), []) self.assertEquals(self.dict1.a_word_forms(u"psżczola"), []) self.assertEquals(self.dict1m.a_word_forms(u"pszczole"), [[ u"pszczoła", u"pszczoły", u"pszczole", u"pszczołę", u"pszczołą", u"pszczoło", u"pszczół", u"pszczołom", u"pszczołami", u"pszczołach", ]]) self.assertEquals(self.dict1m.a_word_forms(u"spodniach"), [[ u"spodnie", u"spodni", u"spodniom", u"spodniami", u"spodniach", ]]) self.assertEquals(self.dict1m.a_word_forms(u"spodniachhhhhhhh"), []) self.assertEquals(self.dict1.a_word_forms(u"pszczól"), [[ u"pszczoła", u"pszczoły", u"pszczole", u"pszczołę", u"pszczołą", u"pszczoło", u"pszczół", u"pszczołom", u"pszczołami", u"pszczołach", ]]) def test_empty_label_word_forms(self): self.assertEquals(self.dict1.word_forms(u"abakusem"), [[ u"abakus", u"abakusa", u"abakusach", u"abakusami", u"abakusem", u"abakusie", u"abakusom", u"abakusowi", u"abakusów", u"abakusy" ]]) self.assertEquals(self.dict1m.word_forms(u"abakusem"), [[ u"abakus", u"abakusa", u"abakusach", u"abakusami", u"abakusem", u"abakusie", u"abakusom", u"abakusowi", u"abakusów", u"abakusy" ]]) def test_dic_name(self): self.assertEquals(self.dict1.name, u"dict1") self.assertEquals(self.dict1m.name, u"dict1.txt") def test_dic_len(self): self.assertEquals(len(self.dict1), 17) self.assertEquals(len(self.dict1m), 17) def test_id_base(self): self.assertEquals(self.dict1.id_base(PyDicId('2@dict1')), u"pies") self.assertEquals(self.dict1.id_base(PyDicId('2000@dict1')), None) def test_word_base(self): self.assertEquals(self.dict1.word_base(u"psów"), [u"pies"]) self.assertEquals(self.dict1.word_base(u"spodniami"), [u"spodnie"]) self.assertEquals(self.dict1.word_base(u"#"), []) self.assertEquals(self.dict1.word_base(u"pilotowi"), [u"pilot"]) def test_a_word_base(self): self.assertEquals(self.dict1.a_word_base(u"psow"), [u"pies"]) self.assertEquals(self.dict1.a_word_base(u"spodniami"), [u"spodnie"]) self.assertEquals(self.dict1.a_word_base(u"#"), []) self.assertEquals(self.dict1.a_word_base(u"pilotowi"), [u"pilot"]) self.assertEquals(self.dict1.a_word_base(u"psów"), [u"pies"]) self.assertEquals(self.dict1.a_word_base(u"pśow"), []) def test_lowercase_hash(self): self.assertEquals(self.dict1.word_base(u'żoliborzowi'), [u"Żoliborz"]) def test_iter(self): self.assertEqual(list(self.dict1), [ PyDicId(u'1@dict1'), PyDicId(u'2@dict1'), PyDicId(u'3@dict1'), PyDicId(u'4@dict1'), PyDicId(u'5@dict1'), PyDicId(u'6@dict1'), PyDicId(u'7@dict1'), PyDicId(u'8@dict1'), PyDicId(u'9@dict1'), PyDicId(u'10@dict1'), PyDicId(u'11@dict1'), PyDicId(u'12@dict1'), PyDicId(u'13@dict1'), PyDicId(u'14@dict1'), PyDicId(u'15@dict1'), PyDicId(u'16@dict1'), PyDicId(u'17@dict1') ]) def test_common_prefix(self): self.assertEquals( PyDic.common_prefix([ 'abakus', 'abakusa', 'abakusowi', 'abakus', 'abakusem', 'abakusie', 'abakusie', 'abakusy', 'abakusów', 'abakusom', 'abakusy', 'abakusami', 'abakusach', 'abakusy' ]), [ 'abakus', '', 'a', 'owi', '', 'em', 'ie', 'ie', 'y', 'ów', 'om', 'y', 'ami', 'ach', 'y' ]) def test_different_id_types(self): self.assertNotEqual(self.dict1.id_forms(PyDicId('4@dict1')), []) self.assertEqual(self.dict1.id_forms(PyDicId('4@dict1')), self.dict1.id_forms('4@dict1')) self.assertEqual(self.dict1.id_forms(PyDicId('4@dict1')), self.dict1.id_forms(4))
def setUp(self): self.stemmer = PydicStemmer() self.dictionary = PyDic('dict1.txt') self.index = self.stemmer.build_index(self.dictionary)
def load_dictionary(self, path): dic = PyDic(path) self.dictionaries[dic.name] = dic
class TestPyDicBase(unittest.TestCase): def __init__(self, methodName='runTest'): self.current_dir = os.path.dirname(os.path.realpath(__file__)) self.temp_dict1_path = tempfile.mkdtemp() self.dict1_file = open(os.path.join(self.current_dir, 'dict1.txt')) PyDicCreator().generate(self.dict1_file, self.temp_dict1_path, 'dict1', verbose=False) self.dict1 = PyDic(self.temp_dict1_path) self.dict1m = PyDic('dict1.txt') return super(TestPyDicBase, self).__init__(methodName) # def setUp(self): def test_file_load(self): dict1 = PyDic('dict1.txt') self.assertEquals(dict1.id(u'kotem'), ['*****@*****.**']) self.assertEquals(dict1.id(u'utrafieniu'), ['*****@*****.**']) self.assertEquals(dict1.id(u'pszczoły'), ['*****@*****.**']) self.assertEquals(dict1.id(u'spodniami'), ['*****@*****.**']) self.assertEquals(dict1.id(u'piloty'), ['*****@*****.**', '*****@*****.**']) self.assertEquals(dict1.id(u'piloci'), ['*****@*****.**']) def test_name(self): self.assertEquals(self.dict1.name, 'dict1') self.assertEquals(self.dict1m.name, 'dict1.txt') def test_id(self): self.assertEquals(self.dict1.id(u'kotem'), ['1@dict1']) self.assertEquals(self.dict1.id(u'utrafieniu'), ['7@dict1']) self.assertEquals(self.dict1.id(u'pszczoły'), ['4@dict1']) self.assertEquals(self.dict1.id(u'spodniami'), ['3@dict1']) self.assertEquals(self.dict1.id(u'piloty'), ['11@dict1', '12@dict1']) self.assertEquals(self.dict1.id(u'piloci'), ['10@dict1']) self.assertEquals(self.dict1m.id(u'kotem'), ['*****@*****.**']) self.assertEquals(self.dict1m.id(u'utrafieniu'), ['*****@*****.**']) self.assertEquals(self.dict1m.id(u'pszczoły'), ['*****@*****.**']) self.assertEquals(self.dict1m.id(u'spodniami'), ['*****@*****.**']) self.assertEquals(self.dict1m.id(u'piloty'), ['*****@*****.**', '*****@*****.**']) self.assertEquals(self.dict1m.id(u'piloci'), ['*****@*****.**']) def test_a_id(self): self.assertEquals(self.dict1.a_id(u'pszczoly'), ['4@dict1']) self.assertEquals(self.dict1m.a_id(u'pszczoly'), ['*****@*****.**']) def test_id_forms(self): self.assertEquals(self.dict1.id_forms(PyDicId('4@dict1')), [u"pszczoła", u"pszczoły", u"pszczole", u"pszczołę", u"pszczołą", u"pszczoło", u"pszczół", u"pszczołom", u"pszczołami", u"pszczołach", ]) self.assertEquals(self.dict1.id_forms(PyDicId('3@dict1')), [u"spodnie", u"spodni", u"spodniom", u"spodniami", u"spodniach", ]) self.assertEquals(self.dict1.id_forms(PyDicId('30000@dict1')), []) self.assertEquals(self.dict1m.id_forms(PyDicId('*****@*****.**')), [u"pszczoła", u"pszczoły", u"pszczole", u"pszczołę", u"pszczołą", u"pszczoło", u"pszczół", u"pszczołom", u"pszczołami", u"pszczołach", ]) self.assertEquals(self.dict1m.id_forms(PyDicId('*****@*****.**')), [u"spodnie", u"spodni", u"spodniom", u"spodniami", u"spodniach", ]) self.assertEquals(self.dict1m.id_forms(PyDicId('*****@*****.**')), []) def test_word_forms(self): self.assertEquals(self.dict1.word_forms(u"pszczołę"), [ [u"pszczoła", u"pszczoły", u"pszczole", u"pszczołę", u"pszczołą", u"pszczoło", u"pszczół", u"pszczołom", u"pszczołami", u"pszczołach", ]]) self.assertEquals(self.dict1.word_forms(u"spodniach"), [ [u"spodnie", u"spodni", u"spodniom", u"spodniami", u"spodniach", ]]) self.assertEquals(self.dict1.word_forms(u"spodniachhhhhhhh"), []) self.assertEquals(self.dict1m.word_forms(u"pszczołę"), [ [u"pszczoła", u"pszczoły", u"pszczole", u"pszczołę", u"pszczołą", u"pszczoło", u"pszczół", u"pszczołom", u"pszczołami", u"pszczołach", ]]) self.assertEquals(self.dict1m.word_forms(u"spodniach"), [ [u"spodnie", u"spodni", u"spodniom", u"spodniami", u"spodniach", ]]) self.assertEquals(self.dict1m.word_forms(u"spodniachhhhhhhh"), []) def test_a_word_forms(self): self.assertEquals(self.dict1.a_word_forms(u"pszczole"), [ [u"pszczoła", u"pszczoły", u"pszczole", u"pszczołę", u"pszczołą", u"pszczoło", u"pszczół", u"pszczołom", u"pszczołami", u"pszczołach", ]]) self.assertEquals(self.dict1.a_word_forms(u"spodniach"), [ [u"spodnie", u"spodni", u"spodniom", u"spodniami", u"spodniach", ]]) self.assertEquals(self.dict1.a_word_forms(u"spodniachhhhhhhh"), []) self.assertEquals(self.dict1m.a_word_forms(u"pszczole"), [ [u"pszczoła", u"pszczoły", u"pszczole", u"pszczołę", u"pszczołą", u"pszczoło", u"pszczół", u"pszczołom", u"pszczołami", u"pszczołach", ]]) self.assertEquals(self.dict1m.a_word_forms(u"spodniach"), [ [u"spodnie", u"spodni", u"spodniom", u"spodniami", u"spodniach", ]]) self.assertEquals(self.dict1m.a_word_forms(u"spodniachhhhhhhh"), []) def test_words_for_prefix(self): def assertListsSame(a, b): a.sort() b.sort() return self.assertEquals(a, b) assertListsSame( self.dict1m.words_for_prefix(u"pszczo"), [u"pszczoła", u"pszczoły", u"pszczole", u"pszczołą", u"pszczoło", u"pszczołę", u"pszczołami", u"pszczołach", u"pszczołom"] ) self.assertEquals(self.dict1m.words_for_prefix(u"Xpszczo"), []) self.assertEquals(self.dict1m.words_for_prefix(u"pszczoX"), []) self.assertEquals(self.dict1m.words_for_prefix(u"pszczox"), []) self.assertEquals(self.dict1m.words_for_prefix(u" pszczo"), []) self.assertEquals(self.dict1m.words_for_prefix(u"pszczo "), []) assertListsSame(self.dict1m.words_for_prefix(u"pi"), [ u"pies", u"piloci", u"pilocie", u"pilocie", u"pilocie", u"pilot", u"pilot", u"pilot", u"pilota", u"pilota", u"pilota", u"pilotach", u"pilotach", u"pilotach", u"pilotami", u"pilotami", u"pilotami", u"pilotem", u"pilotem", u"pilotem", u"pilotom", u"pilotom", u"pilotom", u"pilotowi", u"pilotowi", u"pilotowi", u"piloty", u"piloty", u"pilot\xf3w", u"pilot\xf3w", u"pilot\xf3w"] ) assertListsSame(self.dict1m.words_for_prefix(u"pie"), [u"pies"]) def test_empty_label_word_forms(self): self.assertEquals(self.dict1.word_forms(u"abakusem"), [ [u"abakus", u"abakusa", u"abakusach", u"abakusami", u"abakusem", u"abakusie", u"abakusom", u"abakusowi", u"abakusów", u"abakusy"]]) self.assertEquals(self.dict1m.word_forms(u"abakusem"), [ [u"abakus", u"abakusa", u"abakusach", u"abakusami", u"abakusem", u"abakusie", u"abakusom", u"abakusowi", u"abakusów", u"abakusy"]]) def test_dic_name(self): self.assertEquals(self.dict1.name, u"dict1") self.assertEquals(self.dict1m.name, u"dict1.txt") def test_dic_len(self): self.assertEquals(len(self.dict1), 17) self.assertEquals(len(self.dict1m), 17) def test_id_base(self): self.assertEquals(self.dict1.id_base(PyDicId('2@dict1')), u"pies") self.assertEquals(self.dict1.id_base(PyDicId('2000@dict1')), None) def test_word_base(self): self.assertEquals(self.dict1.word_base(u"psów"), [u"pies"]) self.assertEquals(self.dict1.word_base(u"spodniami"), [u"spodnie"]) self.assertEquals(self.dict1.word_base(u"#"), []) self.assertEquals(self.dict1.word_base(u"pilotowi"), [u"pilot"]) def test_a_word_base(self): self.assertEquals(self.dict1.a_word_base(u"psow"), [u"pies"]) self.assertEquals(self.dict1.a_word_base(u"spodniami"), [u"spodnie"]) self.assertEquals(self.dict1.a_word_base(u"#"), []) self.assertEquals(self.dict1.a_word_base(u"pilotowi"), [u"pilot"]) def test_lowercase_hash(self): self.assertEquals(self.dict1.word_base(u'żoliborzowi'), [u"Żoliborz"]) def test_iter(self): self.assertEqual(list(self.dict1), [PyDicId(u'1@dict1'), PyDicId(u'2@dict1'), PyDicId(u'3@dict1'), PyDicId(u'4@dict1'), PyDicId(u'5@dict1'), PyDicId(u'6@dict1'), PyDicId(u'7@dict1'), PyDicId(u'8@dict1'), PyDicId(u'9@dict1'), PyDicId(u'10@dict1'), PyDicId(u'11@dict1'), PyDicId(u'12@dict1'), PyDicId(u'13@dict1'), PyDicId(u'14@dict1'), PyDicId(u'15@dict1'), PyDicId(u'16@dict1'), PyDicId(u'17@dict1')] ) def test_common_prefix(self): self.assertEquals(PyDic.common_prefix( ['abakus', 'abakusa', 'abakusowi', 'abakus', 'abakusem', 'abakusie', 'abakusie', 'abakusy', 'abakusów', 'abakusom', 'abakusy', 'abakusami', 'abakusach', 'abakusy']), ['abakus', '', 'a', 'owi', '', 'em', 'ie', 'ie', 'y', 'ów', 'om', 'y', 'ami', 'ach', 'y'] ) def test_different_id_types(self): self.assertNotEqual(self.dict1.id_forms(PyDicId('4@dict1')), []) self.assertEqual(self.dict1.id_forms(PyDicId('4@dict1')), self.dict1.id_forms('4@dict1')) self.assertEqual(self.dict1.id_forms(PyDicId('4@dict1')), self.dict1.id_forms(4))
### LEMMATISING ### from nltk.stem import WordNetLemmatizer from nltk.corpus import wordnet as wn from pattern3.fr import parse as frparse from pattern3.nl import parse as nlparse from pattern3.de import parse as deparse from pattern3.it import parse as itparse from pydic import PyDic from pymystem3 import Mystem if __name__ == "__main__": # Initialising Lemmatisers with logs print("Initialising lemmatiser for Polish... ", end='\r') pl_dict = PyDic('pydic/odm.txt') print("Initialising lemmatiser for Russian... ", end='\r') ru_lemmatiser = Mystem() print("Initialising lemmatiser for English... ", end='\r') en_lemmatiser = WordNetLemmatizer() print("Done initialising lemmatisers. ") def pl_lemmatise(word): """ Lemmatiser for Polish :param word: string :return: string """ word_forms = pl_dict.word_base(word) if word_forms: