예제 #1
0
    def setUp(self):
        self.oktavia = Oktavia()
        self.table = self.oktavia.add_table('address book',
                                            ['zip', 'city', 'area code'])

        self.oktavia.add_word("94101")  # 5
        self.table.set_column_tail_and_EOB()
        self.oktavia.add_word("San Francisco")  # 13
        self.table.set_column_tail_and_EOB()
        self.oktavia.add_word("415")  # 3
        self.table.set_column_tail_and_EOB()
        self.table.set_row_tail()

        self.oktavia.add_word("94607")  # 5
        self.table.set_column_tail_and_EOB()
        self.oktavia.add_word("Oakland")  # 7
        self.table.set_column_tail_and_EOB()
        self.oktavia.add_word("510")  # 3
        self.table.set_column_tail_and_EOB()
        self.table.set_row_tail()

        self.oktavia.add_word("94401")  # 5
        self.table.set_column_tail_and_EOB()
        self.oktavia.add_word("San Mateo")  # 9
        self.table.set_column_tail_and_EOB()
        self.oktavia.add_word("650")  # 3
        self.table.set_column_tail_and_EOB()
        self.table.set_row_tail()

        self.oktavia.build()
예제 #2
0
 def test_load_dump_and_search_with_stemming(self):
     dump = self.oktavia.dump()
     oktavia = Oktavia()
     oktavia.set_stemmer(snowballstemmer.EnglishStemmer())
     oktavia.load(dump)
     results = oktavia.raw_search(u'baby', stemming=True)
     self.assertEqual(1, len(results))
예제 #3
0
 def setUp(self):
     self.oktavia = Oktavia()
     self.block = self.oktavia.add_block('document')
     self.oktavia.add_word("abracadabra")
     self.block.start_block("river")
     self.oktavia.add_word("mississippi")
     self.block.end_block()
     self.oktavia.add_word("abracadabra mississippi")
     self.oktavia.build()
예제 #4
0
 def setUp(self):
     self.oktavia = Oktavia()
     self.oktavia.set_stemmer(snowballstemmer.EnglishStemmer())
     self.section = self.oktavia.add_section(u'document')
     self.oktavia.add_word(u"stemming baby", stemming=True)
     self.section.set_tail(u"doc1")
     self.oktavia.add_word(u"stemmed babies", stemming=True)
     self.section.set_tail(u"doc2")
     self.oktavia.build()
예제 #5
0
 def setUp(self):
     self.oktavia = Oktavia()
     self.splitter = self.oktavia.add_splitter('document')
     self.oktavia.add_word("abracadabra")
     self.splitter.split()
     self.oktavia.add_word("mississippi")
     self.splitter.split()
     self.oktavia.add_word("abracadabra mississippi")
     self.splitter.split()
     self.oktavia.build(5)
예제 #6
0
 def setUp(self):
     self.oktavia = Oktavia()
     self.section = self.oktavia.add_section(u'document')
     self.oktavia.add_word(u"abracadabra")
     self.section.set_tail(u"doc1")
     self.oktavia.add_end_of_block()
     self.oktavia.add_word(u"mississippi")
     self.section.set_tail(u"doc2")
     self.oktavia.add_end_of_block()
     self.oktavia.add_word(u"abracadabra2 mississippi2")
     self.section.set_tail(u"doc3")
     self.oktavia.add_end_of_block()
     self.oktavia.build(25)