Exemple #1
0
 def lemmatize(self, query):
     flag = True
     for i in range(len(query), 0, -1):
         stem = query[:i]
         flex = query[i:]
         lemmas = set()
         """in case we find our stem in db_stems and flex in db_flex,
         we intersect the keys of inner dict of db_stems (the tuples) with
         the values of db_flex (also the tuples); and if the intersection
         is not empty we add the values of the inner dict of db_stems (lems)
         to set lemmas"""
         if stem in self.db_stems and flex in self.db_flex:
             for t in self.db_stems[stem].keys() & self.db_flex[flex]:
                 lemmas.add(self.db_stems[stem][t])
         if lemmas: flag = False
         for l in lemmas:
             yield l
     """returning to the previous algorithm if this one failed"""
     if flag:
         for stem in getStem(query, morphs):
             if stem != '':
                 flag = False
                 yield stem
         if flag:
             print('STEMMA')
             yield from stem_2_1.stemmer(query)
         else:
             print('MACHINE')
     else:
         print('LEMMA')
Exemple #2
0
def stemmer(query, db_stems_name = config.DATABASE_STEMS_NAME, \
                   db_flex_name = config.DATABASE_FLEX_NAME):
    db_stems = shelve.open(db_stems_name)
    db_flex = shelve.open(db_flex_name)
    flag = True
    for i in range(len(query), 0, -1):
        stem = query[:i]
        flex = query[i:]
        if stem in db_stems and \
           flex in db_flex and \
           set(db_stems[stem]) & db_flex[flex]:
            flag = False
            yield stem
    #if flag:
    #   for el in stem_2_1.stemmer(query):
    #       yield el
    db_stems.close()
    db_flex.close()
    if flag:
        yield from stem_2_1.stemmer(query)
Exemple #3
0
 def test_stem1(self):
     self.assertEqual(list(sorted(stemmer('мам'))), list(sorted(['мам'])))
Exemple #4
0
 def test_stem9(self):
     self.assertEqual(list(sorted(stemmer('мыла'))), list(sorted(['мыл'])))
Exemple #5
0
 def test_stem0(self):
     self.assertEqual(list(sorted(stemmer('лаял'))),
                      list(sorted(['лаял', 'ла'])))
Exemple #6
0
 def test_stem7(self):
     self.assertEqual(list(sorted(stemmer('абвгдейку'))),
                      list(sorted(['абвгдейку', 'абвгдейк'])))
Exemple #7
0
 def test_stem8(self):
     self.assertEqual(list(sorted(stemmer('мала'))),
                      list(sorted(['мала', 'м', 'ма', 'мал'])))
Exemple #8
0
 def test_stem6(self):
     self.assertEqual(list(sorted(stemmer('а'))), list(sorted(['а'])))
Exemple #9
0
 def test_stem5(self):
     self.assertEqual(list(sorted(stemmer('пам'))),
                      list(sorted(['пам', 'п'])))
Exemple #10
0
 def test_stem4(self):
     self.assertEqual(list(sorted(stemmer('ого'))),
                      list(sorted(['ого', 'ог'])))
Exemple #11
0
 def test_stem3(self):
     self.assertEqual(list(sorted(stemmer('ба'))), list(sorted(['ба',
                                                                'б'])))
Exemple #12
0
 def test_stem2(self):
     self.assertEqual(list(sorted(stemmer('бабах'))),
                      list(sorted(['бабах', 'баб'])))