def setUp(self): self.oktavia = Oktavia() self.table = self.oktavia.add_table('address book', ['zip', 'city', 'area code']) self.oktavia.add_word("94101") # 5 self.table.set_column_tail_and_EOB() self.oktavia.add_word("San Francisco") # 13 self.table.set_column_tail_and_EOB() self.oktavia.add_word("415") # 3 self.table.set_column_tail_and_EOB() self.table.set_row_tail() self.oktavia.add_word("94607") # 5 self.table.set_column_tail_and_EOB() self.oktavia.add_word("Oakland") # 7 self.table.set_column_tail_and_EOB() self.oktavia.add_word("510") # 3 self.table.set_column_tail_and_EOB() self.table.set_row_tail() self.oktavia.add_word("94401") # 5 self.table.set_column_tail_and_EOB() self.oktavia.add_word("San Mateo") # 9 self.table.set_column_tail_and_EOB() self.oktavia.add_word("650") # 3 self.table.set_column_tail_and_EOB() self.table.set_row_tail() self.oktavia.build()
def test_load_dump_and_search_with_stemming(self): dump = self.oktavia.dump() oktavia = Oktavia() oktavia.set_stemmer(snowballstemmer.EnglishStemmer()) oktavia.load(dump) results = oktavia.raw_search(u'baby', stemming=True) self.assertEqual(1, len(results))
def setUp(self): self.oktavia = Oktavia() self.block = self.oktavia.add_block('document') self.oktavia.add_word("abracadabra") self.block.start_block("river") self.oktavia.add_word("mississippi") self.block.end_block() self.oktavia.add_word("abracadabra mississippi") self.oktavia.build()
def setUp(self): self.oktavia = Oktavia() self.oktavia.set_stemmer(snowballstemmer.EnglishStemmer()) self.section = self.oktavia.add_section(u'document') self.oktavia.add_word(u"stemming baby", stemming=True) self.section.set_tail(u"doc1") self.oktavia.add_word(u"stemmed babies", stemming=True) self.section.set_tail(u"doc2") self.oktavia.build()
def setUp(self): self.oktavia = Oktavia() self.splitter = self.oktavia.add_splitter('document') self.oktavia.add_word("abracadabra") self.splitter.split() self.oktavia.add_word("mississippi") self.splitter.split() self.oktavia.add_word("abracadabra mississippi") self.splitter.split() self.oktavia.build(5)
def setUp(self): self.oktavia = Oktavia() self.section = self.oktavia.add_section(u'document') self.oktavia.add_word(u"abracadabra") self.section.set_tail(u"doc1") self.oktavia.add_end_of_block() self.oktavia.add_word(u"mississippi") self.section.set_tail(u"doc2") self.oktavia.add_end_of_block() self.oktavia.add_word(u"abracadabra2 mississippi2") self.section.set_tail(u"doc3") self.oktavia.add_end_of_block() self.oktavia.build(25)