Example #1
0
    def generate(self, from_source, to_path, name, delimiter=',', verbose=False):

        PyDic.make_pydic_index(from_source=from_source,
                               to_path=to_path,
                               name=name,
                               delimiter=delimiter,
                               verbose=verbose)
Example #2
0
    def __init__(self, methodName='runTest'):
        self.current_dir = os.path.dirname(os.path.realpath(__file__))
        self.temp_dict1_path = tempfile.mkdtemp()
        self.dict1_file = open(os.path.join(self.current_dir, 'dict1.txt'))
        PyDicCreator().generate(self.dict1_file, self.temp_dict1_path, 'dict1',
                                verbose=False)

        self.dict1 = PyDic(self.temp_dict1_path)
        self.dict1m = PyDic('dict1.txt')

        return super(TestPyDicBase, self).__init__(methodName)
Example #3
0
    def __init__(self, methodName='runTest'):
        self.current_dir = os.path.dirname(os.path.realpath(__file__))
        self.temp_dict1_path = tempfile.mkdtemp()
        self.dict1_file = open(os.path.join(self.current_dir, 'dict1.txt'))
        PyDicCreator().generate(self.dict1_file,
                                self.temp_dict1_path,
                                'dict1',
                                verbose=False)

        self.dict1 = PyDic(self.temp_dict1_path)
        self.dict1m = PyDic('dict1.txt')

        return super(TestPyDicBase, self).__init__(methodName)
Example #4
0
 def test_file_load(self):
     dict1 = PyDic('dict1.txt')
     self.assertEquals(dict1.id(u'kotem'), ['*****@*****.**'])
     self.assertEquals(dict1.id(u'utrafieniu'), ['*****@*****.**'])
     self.assertEquals(dict1.id(u'pszczoły'), ['*****@*****.**'])
     self.assertEquals(dict1.id(u'spodniami'), ['*****@*****.**'])
     self.assertEquals(dict1.id(u'piloty'),
                       ['*****@*****.**', '*****@*****.**'])
     self.assertEquals(dict1.id(u'piloci'), ['*****@*****.**'])
Example #5
0
    def test_common_prefix(self):
        self.assertEquals(PyDic.common_prefix(
            ['abakus', 'abakusa', 'abakusowi', 'abakus', 'abakusem', 'abakusie',
             'abakusie', 'abakusy', 'abakusów', 'abakusom', 'abakusy', 'abakusami',
             'abakusach', 'abakusy']),

                          ['abakus', '', 'a', 'owi', '', 'em', 'ie', 'ie', 'y', 'ów',
                           'om', 'y', 'ami', 'ach', 'y']
        )
Example #6
0
 def test_common_prefix(self):
     self.assertEquals(
         PyDic.common_prefix([
             'abakus', 'abakusa', 'abakusowi', 'abakus', 'abakusem',
             'abakusie', 'abakusie', 'abakusy', 'abakusów', 'abakusom',
             'abakusy', 'abakusami', 'abakusach', 'abakusy'
         ]), [
             'abakus', '', 'a', 'owi', '', 'em', 'ie', 'ie', 'y', 'ów',
             'om', 'y', 'ami', 'ach', 'y'
         ])
Example #7
0
 def test_file_load(self):
     dict1 = PyDic('dict1.txt')
     self.assertEquals(dict1.id(u'kotem'), ['*****@*****.**'])
     self.assertEquals(dict1.id(u'utrafieniu'), ['*****@*****.**'])
     self.assertEquals(dict1.id(u'pszczoły'), ['*****@*****.**'])
     self.assertEquals(dict1.id(u'spodniami'), ['*****@*****.**'])
     self.assertEquals(dict1.id(u'piloty'), ['*****@*****.**', '*****@*****.**'])
     self.assertEquals(dict1.id(u'piloci'), ['*****@*****.**'])
Example #8
0
    def run(self):
        """
        Runs as a command line tool
        """
        parser = argparse.ArgumentParser(
            description='Makes inflection of a flat text file with words.')

        parser.add_argument('-d', '--delimiter', default=u',')
        parser.add_argument('-f',
                            '--dictionary-file',
                            help="path to file with text dictionary",
                            required=True)
        parser.add_argument('-t', '--output', help="output file name")

        parser.add_argument('-b',
                            '--base-forms',
                            action="store_true",
                            help="only base forms")
        parser.add_argument('-v',
                            '--verbose',
                            action="store_true",
                            help="debug verbose mode")

        parser.add_argument('input',
                            metavar='FILE',
                            help="filename to process",
                            nargs='?')
        args = parser.parse_args()

        input_stream = sys.stdin
        if args.input:
            input_stream = open(args.input)

        output_stream = sys.stdout
        if args.output:
            output_stream = open(args.output, 'w')

        self.dictionary = PyDic(args.dictionary_file)
        self.index = self.load_index(self.dictionary)

        for line in input_stream:
            line = line.decode('utf-8').strip()
            if line and line[0] != '#':
                print >> output_stream, args.delimiter.join(
                    self.process(self.dictionary,
                                 self.index,
                                 line,
                                 debug=args.verbose)).encode('utf-8')
            else:
                print >> output_stream, line.encode('utf-8')
Example #9
0
class TestPyDicBase(unittest.TestCase):
    def __init__(self, methodName='runTest'):
        self.current_dir = os.path.dirname(os.path.realpath(__file__))
        self.temp_dict1_path = tempfile.mkdtemp()
        self.dict1_file = open(os.path.join(self.current_dir, 'dict1.txt'))
        PyDicCreator().generate(self.dict1_file,
                                self.temp_dict1_path,
                                'dict1',
                                verbose=False)

        self.dict1 = PyDic(self.temp_dict1_path)
        self.dict1m = PyDic('dict1.txt')

        return super(TestPyDicBase, self).__init__(methodName)

    # def setUp(self):

    def test_file_load(self):
        dict1 = PyDic('dict1.txt')
        self.assertEquals(dict1.id(u'kotem'), ['*****@*****.**'])
        self.assertEquals(dict1.id(u'utrafieniu'), ['*****@*****.**'])
        self.assertEquals(dict1.id(u'pszczoły'), ['*****@*****.**'])
        self.assertEquals(dict1.id(u'spodniami'), ['*****@*****.**'])
        self.assertEquals(dict1.id(u'piloty'),
                          ['*****@*****.**', '*****@*****.**'])
        self.assertEquals(dict1.id(u'piloci'), ['*****@*****.**'])

    def test_name(self):
        self.assertEquals(self.dict1.name, 'dict1')
        self.assertEquals(self.dict1m.name, 'dict1.txt')

    def test_id(self):
        self.assertEquals(self.dict1.id(u'kotem'), ['1@dict1'])
        self.assertEquals(self.dict1.id(u'utrafieniu'), ['7@dict1'])
        self.assertEquals(self.dict1.id(u'pszczoły'), ['4@dict1'])
        self.assertEquals(self.dict1.id(u'spodniami'), ['3@dict1'])
        self.assertEquals(self.dict1.id(u'piloty'), ['11@dict1', '12@dict1'])
        self.assertEquals(self.dict1.id(u'piloci'), ['10@dict1'])

        self.assertEquals(self.dict1m.id(u'kotem'), ['*****@*****.**'])
        self.assertEquals(self.dict1m.id(u'utrafieniu'), ['*****@*****.**'])
        self.assertEquals(self.dict1m.id(u'pszczoły'), ['*****@*****.**'])
        self.assertEquals(self.dict1m.id(u'spodniami'), ['*****@*****.**'])
        self.assertEquals(self.dict1m.id(u'piloty'),
                          ['*****@*****.**', '*****@*****.**'])
        self.assertEquals(self.dict1m.id(u'piloci'), ['*****@*****.**'])

    def test_a_id(self):
        self.assertEquals(self.dict1.a_id(u'pszczoly'), ['4@dict1'])
        self.assertEquals(self.dict1.a_id(u'pszczoły'), ['4@dict1'])
        self.assertEquals(self.dict1m.a_id(u'pszczoly'), ['*****@*****.**'])
        self.assertEquals(self.dict1m.a_id(u'pszczóly'), [])

    def test_id_forms(self):
        self.assertEquals(self.dict1.id_forms(PyDicId('4@dict1')), [
            u"pszczoła",
            u"pszczoły",
            u"pszczole",
            u"pszczołę",
            u"pszczołą",
            u"pszczoło",
            u"pszczół",
            u"pszczołom",
            u"pszczołami",
            u"pszczołach",
        ])
        self.assertEquals(self.dict1.id_forms(PyDicId('3@dict1')), [
            u"spodnie",
            u"spodni",
            u"spodniom",
            u"spodniami",
            u"spodniach",
        ])
        self.assertEquals(self.dict1.id_forms(PyDicId('30000@dict1')), [])

        self.assertEquals(self.dict1m.id_forms(PyDicId('*****@*****.**')), [
            u"pszczoła",
            u"pszczoły",
            u"pszczole",
            u"pszczołę",
            u"pszczołą",
            u"pszczoło",
            u"pszczół",
            u"pszczołom",
            u"pszczołami",
            u"pszczołach",
        ])
        self.assertEquals(self.dict1m.id_forms(PyDicId('*****@*****.**')), [
            u"spodnie",
            u"spodni",
            u"spodniom",
            u"spodniami",
            u"spodniach",
        ])
        self.assertEquals(self.dict1m.id_forms(PyDicId('*****@*****.**')), [])

    def test_word_forms(self):
        self.assertEquals(self.dict1.word_forms(u"pszczołę"), [[
            u"pszczoła",
            u"pszczoły",
            u"pszczole",
            u"pszczołę",
            u"pszczołą",
            u"pszczoło",
            u"pszczół",
            u"pszczołom",
            u"pszczołami",
            u"pszczołach",
        ]])
        self.assertEquals(self.dict1.word_forms(u"spodniach"), [[
            u"spodnie",
            u"spodni",
            u"spodniom",
            u"spodniami",
            u"spodniach",
        ]])
        self.assertEquals(self.dict1.word_forms(u"spodniachhhhhhhh"), [])

        self.assertEquals(self.dict1m.word_forms(u"pszczołę"), [[
            u"pszczoła",
            u"pszczoły",
            u"pszczole",
            u"pszczołę",
            u"pszczołą",
            u"pszczoło",
            u"pszczół",
            u"pszczołom",
            u"pszczołami",
            u"pszczołach",
        ]])
        self.assertEquals(self.dict1m.word_forms(u"spodniach"), [[
            u"spodnie",
            u"spodni",
            u"spodniom",
            u"spodniami",
            u"spodniach",
        ]])
        self.assertEquals(self.dict1m.word_forms(u"spodniachhhhhhhh"), [])

    def test_a_word_forms(self):
        self.assertEquals(self.dict1.a_word_forms(u"pszczole"), [[
            u"pszczoła",
            u"pszczoły",
            u"pszczole",
            u"pszczołę",
            u"pszczołą",
            u"pszczoło",
            u"pszczół",
            u"pszczołom",
            u"pszczołami",
            u"pszczołach",
        ]])
        self.assertEquals(self.dict1.a_word_forms(u"spodniach"), [[
            u"spodnie",
            u"spodni",
            u"spodniom",
            u"spodniami",
            u"spodniach",
        ]])
        self.assertEquals(self.dict1.a_word_forms(u"spodniachhhhhhhh"), [])
        self.assertEquals(self.dict1.a_word_forms(u"psżczola"), [])

        self.assertEquals(self.dict1m.a_word_forms(u"pszczole"), [[
            u"pszczoła",
            u"pszczoły",
            u"pszczole",
            u"pszczołę",
            u"pszczołą",
            u"pszczoło",
            u"pszczół",
            u"pszczołom",
            u"pszczołami",
            u"pszczołach",
        ]])
        self.assertEquals(self.dict1m.a_word_forms(u"spodniach"), [[
            u"spodnie",
            u"spodni",
            u"spodniom",
            u"spodniami",
            u"spodniach",
        ]])
        self.assertEquals(self.dict1m.a_word_forms(u"spodniachhhhhhhh"), [])
        self.assertEquals(self.dict1.a_word_forms(u"pszczól"), [[
            u"pszczoła",
            u"pszczoły",
            u"pszczole",
            u"pszczołę",
            u"pszczołą",
            u"pszczoło",
            u"pszczół",
            u"pszczołom",
            u"pszczołami",
            u"pszczołach",
        ]])

    def test_empty_label_word_forms(self):
        self.assertEquals(self.dict1.word_forms(u"abakusem"), [[
            u"abakus", u"abakusa", u"abakusach", u"abakusami", u"abakusem",
            u"abakusie", u"abakusom", u"abakusowi", u"abakusów", u"abakusy"
        ]])

        self.assertEquals(self.dict1m.word_forms(u"abakusem"), [[
            u"abakus", u"abakusa", u"abakusach", u"abakusami", u"abakusem",
            u"abakusie", u"abakusom", u"abakusowi", u"abakusów", u"abakusy"
        ]])

    def test_dic_name(self):
        self.assertEquals(self.dict1.name, u"dict1")
        self.assertEquals(self.dict1m.name, u"dict1.txt")

    def test_dic_len(self):
        self.assertEquals(len(self.dict1), 17)
        self.assertEquals(len(self.dict1m), 17)

    def test_id_base(self):
        self.assertEquals(self.dict1.id_base(PyDicId('2@dict1')), u"pies")
        self.assertEquals(self.dict1.id_base(PyDicId('2000@dict1')), None)

    def test_word_base(self):
        self.assertEquals(self.dict1.word_base(u"psów"), [u"pies"])
        self.assertEquals(self.dict1.word_base(u"spodniami"), [u"spodnie"])
        self.assertEquals(self.dict1.word_base(u"#"), [])
        self.assertEquals(self.dict1.word_base(u"pilotowi"), [u"pilot"])

    def test_a_word_base(self):
        self.assertEquals(self.dict1.a_word_base(u"psow"), [u"pies"])
        self.assertEquals(self.dict1.a_word_base(u"spodniami"), [u"spodnie"])
        self.assertEquals(self.dict1.a_word_base(u"#"), [])
        self.assertEquals(self.dict1.a_word_base(u"pilotowi"), [u"pilot"])
        self.assertEquals(self.dict1.a_word_base(u"psów"), [u"pies"])
        self.assertEquals(self.dict1.a_word_base(u"pśow"), [])

    def test_lowercase_hash(self):
        self.assertEquals(self.dict1.word_base(u'żoliborzowi'), [u"Żoliborz"])

    def test_iter(self):
        self.assertEqual(list(self.dict1), [
            PyDicId(u'1@dict1'),
            PyDicId(u'2@dict1'),
            PyDicId(u'3@dict1'),
            PyDicId(u'4@dict1'),
            PyDicId(u'5@dict1'),
            PyDicId(u'6@dict1'),
            PyDicId(u'7@dict1'),
            PyDicId(u'8@dict1'),
            PyDicId(u'9@dict1'),
            PyDicId(u'10@dict1'),
            PyDicId(u'11@dict1'),
            PyDicId(u'12@dict1'),
            PyDicId(u'13@dict1'),
            PyDicId(u'14@dict1'),
            PyDicId(u'15@dict1'),
            PyDicId(u'16@dict1'),
            PyDicId(u'17@dict1')
        ])

    def test_common_prefix(self):
        self.assertEquals(
            PyDic.common_prefix([
                'abakus', 'abakusa', 'abakusowi', 'abakus', 'abakusem',
                'abakusie', 'abakusie', 'abakusy', 'abakusów', 'abakusom',
                'abakusy', 'abakusami', 'abakusach', 'abakusy'
            ]), [
                'abakus', '', 'a', 'owi', '', 'em', 'ie', 'ie', 'y', 'ów',
                'om', 'y', 'ami', 'ach', 'y'
            ])

    def test_different_id_types(self):
        self.assertNotEqual(self.dict1.id_forms(PyDicId('4@dict1')), [])
        self.assertEqual(self.dict1.id_forms(PyDicId('4@dict1')),
                         self.dict1.id_forms('4@dict1'))
        self.assertEqual(self.dict1.id_forms(PyDicId('4@dict1')),
                         self.dict1.id_forms(4))
Example #10
0
 def setUp(self):
     self.stemmer = PydicStemmer()
     self.dictionary = PyDic('dict1.txt')
     self.index = self.stemmer.build_index(self.dictionary)
Example #11
0
 def load_dictionary(self, path):
     dic = PyDic(path)
     self.dictionaries[dic.name] = dic
Example #12
0
class TestPyDicBase(unittest.TestCase):
    def __init__(self, methodName='runTest'):
        self.current_dir = os.path.dirname(os.path.realpath(__file__))
        self.temp_dict1_path = tempfile.mkdtemp()
        self.dict1_file = open(os.path.join(self.current_dir, 'dict1.txt'))
        PyDicCreator().generate(self.dict1_file, self.temp_dict1_path, 'dict1',
                                verbose=False)

        self.dict1 = PyDic(self.temp_dict1_path)
        self.dict1m = PyDic('dict1.txt')

        return super(TestPyDicBase, self).__init__(methodName)

    # def setUp(self):


    def test_file_load(self):
        dict1 = PyDic('dict1.txt')
        self.assertEquals(dict1.id(u'kotem'), ['*****@*****.**'])
        self.assertEquals(dict1.id(u'utrafieniu'), ['*****@*****.**'])
        self.assertEquals(dict1.id(u'pszczoły'), ['*****@*****.**'])
        self.assertEquals(dict1.id(u'spodniami'), ['*****@*****.**'])
        self.assertEquals(dict1.id(u'piloty'), ['*****@*****.**', '*****@*****.**'])
        self.assertEquals(dict1.id(u'piloci'), ['*****@*****.**'])

    def test_name(self):
        self.assertEquals(self.dict1.name, 'dict1')
        self.assertEquals(self.dict1m.name, 'dict1.txt')

    def test_id(self):
        self.assertEquals(self.dict1.id(u'kotem'), ['1@dict1'])
        self.assertEquals(self.dict1.id(u'utrafieniu'), ['7@dict1'])
        self.assertEquals(self.dict1.id(u'pszczoły'), ['4@dict1'])
        self.assertEquals(self.dict1.id(u'spodniami'), ['3@dict1'])
        self.assertEquals(self.dict1.id(u'piloty'), ['11@dict1', '12@dict1'])
        self.assertEquals(self.dict1.id(u'piloci'), ['10@dict1'])

        self.assertEquals(self.dict1m.id(u'kotem'), ['*****@*****.**'])
        self.assertEquals(self.dict1m.id(u'utrafieniu'), ['*****@*****.**'])
        self.assertEquals(self.dict1m.id(u'pszczoły'), ['*****@*****.**'])
        self.assertEquals(self.dict1m.id(u'spodniami'), ['*****@*****.**'])
        self.assertEquals(self.dict1m.id(u'piloty'), ['*****@*****.**', '*****@*****.**'])
        self.assertEquals(self.dict1m.id(u'piloci'), ['*****@*****.**'])

    def test_a_id(self):
        self.assertEquals(self.dict1.a_id(u'pszczoly'), ['4@dict1'])

        self.assertEquals(self.dict1m.a_id(u'pszczoly'), ['*****@*****.**'])


    def test_id_forms(self):
        self.assertEquals(self.dict1.id_forms(PyDicId('4@dict1')),
                          [u"pszczoła", u"pszczoły", u"pszczole", u"pszczołę",
                           u"pszczołą", u"pszczoło",
                           u"pszczół", u"pszczołom", u"pszczołami",
                           u"pszczołach", ])
        self.assertEquals(self.dict1.id_forms(PyDicId('3@dict1')),
                          [u"spodnie", u"spodni", u"spodniom", u"spodniami",
                           u"spodniach", ])
        self.assertEquals(self.dict1.id_forms(PyDicId('30000@dict1')), [])


        self.assertEquals(self.dict1m.id_forms(PyDicId('*****@*****.**')),
                          [u"pszczoła", u"pszczoły", u"pszczole", u"pszczołę",
                           u"pszczołą", u"pszczoło",
                           u"pszczół", u"pszczołom", u"pszczołami",
                           u"pszczołach", ])
        self.assertEquals(self.dict1m.id_forms(PyDicId('*****@*****.**')),
                          [u"spodnie", u"spodni", u"spodniom", u"spodniami",
                           u"spodniach", ])
        self.assertEquals(self.dict1m.id_forms(PyDicId('*****@*****.**')), [])




    def test_word_forms(self):
        self.assertEquals(self.dict1.word_forms(u"pszczołę"), [
            [u"pszczoła", u"pszczoły", u"pszczole", u"pszczołę",
             u"pszczołą", u"pszczoło",
             u"pszczół", u"pszczołom", u"pszczołami",
             u"pszczołach", ]])
        self.assertEquals(self.dict1.word_forms(u"spodniach"), [
            [u"spodnie", u"spodni", u"spodniom", u"spodniami", u"spodniach",
            ]])
        self.assertEquals(self.dict1.word_forms(u"spodniachhhhhhhh"), [])


        self.assertEquals(self.dict1m.word_forms(u"pszczołę"), [
            [u"pszczoła", u"pszczoły", u"pszczole", u"pszczołę",
             u"pszczołą", u"pszczoło",
             u"pszczół", u"pszczołom", u"pszczołami",
             u"pszczołach", ]])
        self.assertEquals(self.dict1m.word_forms(u"spodniach"), [
            [u"spodnie", u"spodni", u"spodniom", u"spodniami", u"spodniach",
            ]])
        self.assertEquals(self.dict1m.word_forms(u"spodniachhhhhhhh"), [])




    def test_a_word_forms(self):
        self.assertEquals(self.dict1.a_word_forms(u"pszczole"), [
            [u"pszczoła", u"pszczoły", u"pszczole", u"pszczołę", u"pszczołą",
             u"pszczoło", u"pszczół", u"pszczołom",
             u"pszczołami", u"pszczołach", ]])
        self.assertEquals(self.dict1.a_word_forms(u"spodniach"), [
            [u"spodnie", u"spodni", u"spodniom", u"spodniami", u"spodniach",
            ]])
        self.assertEquals(self.dict1.a_word_forms(u"spodniachhhhhhhh"), [])

        self.assertEquals(self.dict1m.a_word_forms(u"pszczole"), [
            [u"pszczoła", u"pszczoły", u"pszczole", u"pszczołę", u"pszczołą",
             u"pszczoło", u"pszczół", u"pszczołom",
             u"pszczołami", u"pszczołach", ]])
        self.assertEquals(self.dict1m.a_word_forms(u"spodniach"), [
            [u"spodnie", u"spodni", u"spodniom", u"spodniami", u"spodniach",
            ]])
        self.assertEquals(self.dict1m.a_word_forms(u"spodniachhhhhhhh"), [])

    def test_words_for_prefix(self):
        def assertListsSame(a, b):
            a.sort()
            b.sort()
            return self.assertEquals(a, b)

        assertListsSame(
            self.dict1m.words_for_prefix(u"pszczo"),
            [u"pszczoła", u"pszczoły", u"pszczole", u"pszczołą", u"pszczoło",
             u"pszczołę", u"pszczołami", u"pszczołach", u"pszczołom"]
        )

        self.assertEquals(self.dict1m.words_for_prefix(u"Xpszczo"), [])
        self.assertEquals(self.dict1m.words_for_prefix(u"pszczoX"), [])
        self.assertEquals(self.dict1m.words_for_prefix(u"pszczox"), [])
        self.assertEquals(self.dict1m.words_for_prefix(u"   pszczo"), [])
        self.assertEquals(self.dict1m.words_for_prefix(u"pszczo "), [])

        assertListsSame(self.dict1m.words_for_prefix(u"pi"), [
            u"pies", u"piloci", u"pilocie", u"pilocie", u"pilocie", u"pilot",
            u"pilot", u"pilot", u"pilota", u"pilota", u"pilota", u"pilotach",
            u"pilotach", u"pilotach", u"pilotami", u"pilotami", u"pilotami",
            u"pilotem", u"pilotem", u"pilotem", u"pilotom", u"pilotom",
            u"pilotom", u"pilotowi", u"pilotowi", u"pilotowi", u"piloty",
            u"piloty", u"pilot\xf3w", u"pilot\xf3w", u"pilot\xf3w"]
        )
        assertListsSame(self.dict1m.words_for_prefix(u"pie"), [u"pies"])


    def test_empty_label_word_forms(self):
        self.assertEquals(self.dict1.word_forms(u"abakusem"), [
            [u"abakus", u"abakusa", u"abakusach", u"abakusami", u"abakusem", u"abakusie",
             u"abakusom", u"abakusowi", u"abakusów", u"abakusy"]])

        self.assertEquals(self.dict1m.word_forms(u"abakusem"), [
            [u"abakus", u"abakusa", u"abakusach", u"abakusami", u"abakusem", u"abakusie",
             u"abakusom", u"abakusowi", u"abakusów", u"abakusy"]])

    def test_dic_name(self):
        self.assertEquals(self.dict1.name, u"dict1")
        self.assertEquals(self.dict1m.name, u"dict1.txt")

    def test_dic_len(self):
        self.assertEquals(len(self.dict1), 17)
        self.assertEquals(len(self.dict1m), 17)


    def test_id_base(self):
        self.assertEquals(self.dict1.id_base(PyDicId('2@dict1')), u"pies")
        self.assertEquals(self.dict1.id_base(PyDicId('2000@dict1')), None)

    def test_word_base(self):
        self.assertEquals(self.dict1.word_base(u"psów"), [u"pies"])
        self.assertEquals(self.dict1.word_base(u"spodniami"), [u"spodnie"])
        self.assertEquals(self.dict1.word_base(u"#"), [])
        self.assertEquals(self.dict1.word_base(u"pilotowi"), [u"pilot"])

    def test_a_word_base(self):
        self.assertEquals(self.dict1.a_word_base(u"psow"), [u"pies"])
        self.assertEquals(self.dict1.a_word_base(u"spodniami"), [u"spodnie"])
        self.assertEquals(self.dict1.a_word_base(u"#"), [])
        self.assertEquals(self.dict1.a_word_base(u"pilotowi"), [u"pilot"])


    def test_lowercase_hash(self):
        self.assertEquals(self.dict1.word_base(u'żoliborzowi'), [u"Żoliborz"])

    def test_iter(self):
        self.assertEqual(list(self.dict1),
                         [PyDicId(u'1@dict1'), PyDicId(u'2@dict1'), PyDicId(u'3@dict1'),
                          PyDicId(u'4@dict1'), PyDicId(u'5@dict1'), PyDicId(u'6@dict1'),
                          PyDicId(u'7@dict1'), PyDicId(u'8@dict1'), PyDicId(u'9@dict1'),
                          PyDicId(u'10@dict1'), PyDicId(u'11@dict1'),
                          PyDicId(u'12@dict1'), PyDicId(u'13@dict1'),
                          PyDicId(u'14@dict1'), PyDicId(u'15@dict1'),
                          PyDicId(u'16@dict1'), PyDicId(u'17@dict1')]
        )


    def test_common_prefix(self):
        self.assertEquals(PyDic.common_prefix(
            ['abakus', 'abakusa', 'abakusowi', 'abakus', 'abakusem', 'abakusie',
             'abakusie', 'abakusy', 'abakusów', 'abakusom', 'abakusy', 'abakusami',
             'abakusach', 'abakusy']),

                          ['abakus', '', 'a', 'owi', '', 'em', 'ie', 'ie', 'y', 'ów',
                           'om', 'y', 'ami', 'ach', 'y']
        )


    def test_different_id_types(self):
        self.assertNotEqual(self.dict1.id_forms(PyDicId('4@dict1')), [])
        self.assertEqual(self.dict1.id_forms(PyDicId('4@dict1')),
                         self.dict1.id_forms('4@dict1'))
        self.assertEqual(self.dict1.id_forms(PyDicId('4@dict1')), self.dict1.id_forms(4))
### LEMMATISING ###

from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet as wn
from pattern3.fr import parse as frparse
from pattern3.nl import parse as nlparse
from pattern3.de import parse as deparse
from pattern3.it import parse as itparse
from pydic import PyDic
from pymystem3 import Mystem

if __name__ == "__main__":
    # Initialising Lemmatisers with logs
    print("Initialising lemmatiser for Polish...  ", end='\r')
    pl_dict = PyDic('pydic/odm.txt')
    print("Initialising lemmatiser for Russian... ", end='\r')
    ru_lemmatiser = Mystem()
    print("Initialising lemmatiser for English... ", end='\r')
    en_lemmatiser = WordNetLemmatizer()
    print("Done initialising lemmatisers.         ")


def pl_lemmatise(word):
    """
    Lemmatiser for Polish
    :param word: string
    :return: string
    """
    word_forms = pl_dict.word_base(word)
    if word_forms: