def test_unit_terminology_instance(terminology_units, terminology0): units = terminology0.stores.first().units.filter( source_f=terminology_units) unit = None for _unit in units: if _unit.source_f == terminology_units: unit = _unit break term = terminology.get(unit.__class__)(unit) assert isinstance(term, UnitTerminology) assert term.context == unit assert term.stopwords == stopwords.get().words assert term.stemmer == stemmer.get() assert term.text == unit.source_f assert ( term.split(term.text) == re.split(u"[^\w'-]+", term.text)) assert ( term.tokens == [t.lower() for t in term.split(term.text) if (len(t) > 2 and t.lower() not in term.stopwords)]) assert ( term.stems == set(term.stemmer(t) for t in term.tokens)) assert term.stem_set == unit.stems assert term.stem_model == term.stem_set.model assert term.stem_m2m == term.stem_set.through unit.stems.all().delete() assert term.existing_stems == set([]) term.stem() assert sorted(term.existing_stems) == sorted(term.stems) old_source = unit.source_f old_stems = term.existing_stems unit.source_f = "hatstand hatstand umbrella" unit.save() term.stem() assert ( sorted(term.existing_stems) == [u'hatstand', u'umbrella']) unit.source_f = old_source unit.save() term.stem() assert ( term.existing_stems == old_stems)
def test_stemmer(): assert stemmer.get() is stem
def stemmer(self): return stemmer.get()