Example #1
0
    def setUp(self):
        self.oktavia = Oktavia()
        self.table = self.oktavia.add_table('address book',
                                            ['zip', 'city', 'area code'])

        self.oktavia.add_word("94101")  # 5
        self.table.set_column_tail_and_EOB()
        self.oktavia.add_word("San Francisco")  # 13
        self.table.set_column_tail_and_EOB()
        self.oktavia.add_word("415")  # 3
        self.table.set_column_tail_and_EOB()
        self.table.set_row_tail()

        self.oktavia.add_word("94607")  # 5
        self.table.set_column_tail_and_EOB()
        self.oktavia.add_word("Oakland")  # 7
        self.table.set_column_tail_and_EOB()
        self.oktavia.add_word("510")  # 3
        self.table.set_column_tail_and_EOB()
        self.table.set_row_tail()

        self.oktavia.add_word("94401")  # 5
        self.table.set_column_tail_and_EOB()
        self.oktavia.add_word("San Mateo")  # 9
        self.table.set_column_tail_and_EOB()
        self.oktavia.add_word("650")  # 3
        self.table.set_column_tail_and_EOB()
        self.table.set_row_tail()

        self.oktavia.build()
Example #2
0
 def setUp(self):
     self.oktavia = Oktavia()
     self.block = self.oktavia.add_block('document')
     self.oktavia.add_word("abracadabra")
     self.block.start_block("river")
     self.oktavia.add_word("mississippi")
     self.block.end_block()
     self.oktavia.add_word("abracadabra mississippi")
     self.oktavia.build()
Example #3
0
 def setUp(self):
     self.oktavia = Oktavia()
     self.oktavia.set_stemmer(snowballstemmer.EnglishStemmer())
     self.section = self.oktavia.add_section(u'document')
     self.oktavia.add_word(u"stemming baby", stemming=True)
     self.section.set_tail(u"doc1")
     self.oktavia.add_word(u"stemmed babies", stemming=True)
     self.section.set_tail(u"doc2")
     self.oktavia.build()
Example #4
0
 def setUp(self):
     self.oktavia = Oktavia()
     self.splitter = self.oktavia.add_splitter('document')
     self.oktavia.add_word("abracadabra")
     self.splitter.split()
     self.oktavia.add_word("mississippi")
     self.splitter.split()
     self.oktavia.add_word("abracadabra mississippi")
     self.splitter.split()
     self.oktavia.build(5)
Example #5
0
 def setUp(self):
     self.oktavia = Oktavia()
     self.section = self.oktavia.add_section(u'document')
     self.oktavia.add_word(u"abracadabra")
     self.section.set_tail(u"doc1")
     self.oktavia.add_end_of_block()
     self.oktavia.add_word(u"mississippi")
     self.section.set_tail(u"doc2")
     self.oktavia.add_end_of_block()
     self.oktavia.add_word(u"abracadabra2 mississippi2")
     self.section.set_tail(u"doc3")
     self.oktavia.add_end_of_block()
     self.oktavia.build(25)
Example #6
0
    def setUp(self):
        self.oktavia = Oktavia()
        self.table = self.oktavia.add_table('address book', ['zip', 'city', 'area code'])

        self.oktavia.add_word("94101") # 5
        self.table.set_column_tail_and_EOB()
        self.oktavia.add_word("San Francisco") # 13
        self.table.set_column_tail_and_EOB()
        self.oktavia.add_word("415") # 3
        self.table.set_column_tail_and_EOB()
        self.table.set_row_tail()

        self.oktavia.add_word("94607") # 5
        self.table.set_column_tail_and_EOB()
        self.oktavia.add_word("Oakland") # 7
        self.table.set_column_tail_and_EOB()
        self.oktavia.add_word("510") # 3
        self.table.set_column_tail_and_EOB()
        self.table.set_row_tail()

        self.oktavia.add_word("94401") # 5
        self.table.set_column_tail_and_EOB()
        self.oktavia.add_word("San Mateo") # 9
        self.table.set_column_tail_and_EOB()
        self.oktavia.add_word("650") # 3
        self.table.set_column_tail_and_EOB()
        self.table.set_row_tail()

        self.oktavia.build()
Example #7
0
 def test_load_dump_and_search_with_stemming(self):
     dump = self.oktavia.dump()
     oktavia = Oktavia()
     oktavia.set_stemmer(snowballstemmer.EnglishStemmer())
     oktavia.load(dump)
     results = oktavia.raw_search(u'baby', stemming=True)
     self.assertEqual(1, len(results))
Example #8
0
 def setUp(self):
     self.oktavia = Oktavia()
     self.block = self.oktavia.add_block('document')
     self.oktavia.add_word("abracadabra")
     self.block.start_block("river")
     self.oktavia.add_word("mississippi")
     self.block.end_block()
     self.oktavia.add_word("abracadabra mississippi")
     self.oktavia.build()
Example #9
0
 def setUp(self):
     self.oktavia = Oktavia()
     self.splitter = self.oktavia.add_splitter('document')
     self.oktavia.add_word("abracadabra")
     self.splitter.split()
     self.oktavia.add_word("mississippi")
     self.splitter.split()
     self.oktavia.add_word("abracadabra mississippi")
     self.splitter.split()
     self.oktavia.build(5)
Example #10
0
 def setUp(self):
     self.oktavia = Oktavia()
     self.section = self.oktavia.add_section(u'document')
     self.oktavia.add_word(u"abracadabra")
     self.section.set_tail(u"doc1")
     self.oktavia.add_end_of_block()
     self.oktavia.add_word(u"mississippi")
     self.section.set_tail(u"doc2")
     self.oktavia.add_end_of_block()
     self.oktavia.add_word(u"abracadabra2 mississippi2")
     self.section.set_tail(u"doc3")
     self.oktavia.add_end_of_block()
     self.oktavia.build(25)
Example #11
0
class SectionTest(unittest.TestCase):
    def setUp(self):
        self.oktavia = Oktavia()
        self.section = self.oktavia.add_section(u'document')
        self.oktavia.add_word(u"abracadabra")
        self.section.set_tail(u"doc1")
        self.oktavia.add_end_of_block()
        self.oktavia.add_word(u"mississippi")
        self.section.set_tail(u"doc2")
        self.oktavia.add_end_of_block()
        self.oktavia.add_word(u"abracadabra2 mississippi2")
        self.section.set_tail(u"doc3")
        self.oktavia.add_end_of_block()
        self.oktavia.build(25)

    def test_doc_sizes(self):
        self.assertEqual(3, self.section.size())

    def test_get_section_index(self):
        self.assertEqual(0, self.section.get_section_index(0))
        self.assertEqual(0, self.section.get_section_index(10))
        self.assertEqual(1, self.section.get_section_index(12))
        self.assertEqual(1, self.section.get_section_index(22))
        self.assertEqual(2, self.section.get_section_index(24))
        self.assertEqual(2, self.section.get_section_index(48))

    def test_get_section_index_boundary(self):
        try:
            self.section.get_section_index(-1)
            self.fail("fm.get_section_index(): -1")
        except:
            pass
        try:
            self.section.get_section_index(49)
            self.fail("fm.get_section_index(): 49")
        except:
            pass

    def test_get_section_content(self):
        self.assertEqual(u"abracadabra", self.section.get_content(0))
        self.assertEqual(u"mississippi", self.section.get_content(1))
        self.assertEqual(u"abracadabra2 mississippi2",
                         self.section.get_content(2))

    def test_get_section_content_boundary(self):
        try:
            self.section.get_content(3)
            self.fail("fm.get_content()")
        except:
            pass
        try:
            self.section.get_content(-1)
            self.fail("fm.get_content()")
        except:
            pass

    def test_get_section_name(self):
        self.assertEqual("doc1", self.section.get_name(0))
        self.assertEqual("doc2", self.section.get_name(1))
        self.assertEqual("doc3", self.section.get_name(2))

    def test_get_section_name_boundary(self):
        try:
            self.section.get_name(3)
            self.fail("fm.get_name()")
        except:
            pass
        try:
            self.section.get_name(-1)
            self.fail("fm.get_name()")
        except:
            pass

    def test_load_dump_and_doc_sizes(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.section = self.oktavia.get_section('document')
        self.assertEqual(3, self.section.size())

    def test_load_dump_and_get_section_index(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.section = self.oktavia.get_section('document')

        self.assertEqual(0, self.section.get_section_index(0))
        self.assertEqual(0, self.section.get_section_index(10))
        self.assertEqual(1, self.section.get_section_index(12))
        self.assertEqual(1, self.section.get_section_index(22))
        self.assertEqual(2, self.section.get_section_index(24))
        self.assertEqual(2, self.section.get_section_index(48))

    def test_load_dump_and_get_section_index_boundary(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.section = self.oktavia.get_section('document')

        try:
            self.section.get_section_index(-1)
            self.fail("fm.get_section_index()")
        except:
            pass
        try:
            self.section.get_section_index(49)
            self.fail("fm.get_section_index()")
        except:
            pass

    def test_load_dump_and_get_section_content(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.section = self.oktavia.get_section('document')
        self.assertEqual("abracadabra", self.section.get_content(0))
        self.assertEqual("mississippi", self.section.get_content(1))
        self.assertEqual("abracadabra2 mississippi2",
                         self.section.get_content(2))

    def test_load_dump_and_get_section_content_boundary(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.section = self.oktavia.get_section('document')

        try:
            self.section.get_content(3)
            self.fail("fm.get_content()")
        except:
            pass
        try:
            self.section.get_content(-1)
            self.fail("fm.get_content()")
        except:
            pass

    def test_load_dump_and_get_section_name(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.section = self.oktavia.get_section('document')

        self.assertEqual("doc1", self.section.get_name(0))
        self.assertEqual("doc2", self.section.get_name(1))
        self.assertEqual("doc3", self.section.get_name(2))

    def test_load_dump_and_get_section_name_boundary(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.section = self.oktavia.get_section('document')

        try:
            self.section.get_name(3)
            self.fail("fm.get_name()")
        except:
            pass
        try:
            self.section.get_name(-1)
            self.fail("fm.get_name()")
        except:
            pass
Example #12
0
class SectionTest(unittest.TestCase):

    def setUp(self):
        self.oktavia = Oktavia()
        self.section = self.oktavia.add_section(u'document')
        self.oktavia.add_word(u"abracadabra")
        self.section.set_tail(u"doc1")
        self.oktavia.add_end_of_block()
        self.oktavia.add_word(u"mississippi")
        self.section.set_tail(u"doc2")
        self.oktavia.add_end_of_block()
        self.oktavia.add_word(u"abracadabra2 mississippi2")
        self.section.set_tail(u"doc3")
        self.oktavia.add_end_of_block()
        self.oktavia.build(25)

    def test_doc_sizes(self):
        self.assertEqual(3, self.section.size())

    def test_get_section_index(self):
        self.assertEqual(0, self.section.get_section_index(0))
        self.assertEqual(0, self.section.get_section_index(10))
        self.assertEqual(1, self.section.get_section_index(12))
        self.assertEqual(1, self.section.get_section_index(22))
        self.assertEqual(2, self.section.get_section_index(24))
        self.assertEqual(2, self.section.get_section_index(48))

    def test_get_section_index_boundary(self):
        try:
            self.section.get_section_index(-1)
            self.fail("fm.get_section_index(): -1")
        except:
            pass
        try:
            self.section.get_section_index(49)
            self.fail("fm.get_section_index(): 49")
        except:
            pass

    def test_get_section_content(self):
        self.assertEqual(u"abracadabra", self.section.get_content(0))
        self.assertEqual(u"mississippi", self.section.get_content(1))
        self.assertEqual(u"abracadabra2 mississippi2", self.section.get_content(2))

    def test_get_section_content_boundary(self):
        try:
            self.section.get_content(3)
            self.fail("fm.get_content()")
        except:
            pass
        try:
            self.section.get_content(-1)
            self.fail("fm.get_content()")
        except:
            pass

    def test_get_section_name(self):
        self.assertEqual("doc1", self.section.get_name(0))
        self.assertEqual("doc2", self.section.get_name(1))
        self.assertEqual("doc3", self.section.get_name(2))

    def test_get_section_name_boundary(self):
        try:
            self.section.get_name(3)
            self.fail("fm.get_name()")
        except:
            pass
        try:
            self.section.get_name(-1)
            self.fail("fm.get_name()")
        except:
            pass

    def test_load_dump_and_doc_sizes(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.section = self.oktavia.get_section('document')
        self.assertEqual(3, self.section.size())

    def test_load_dump_and_get_section_index(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.section = self.oktavia.get_section('document')

        self.assertEqual(0, self.section.get_section_index(0))
        self.assertEqual(0, self.section.get_section_index(10))
        self.assertEqual(1, self.section.get_section_index(12))
        self.assertEqual(1, self.section.get_section_index(22))
        self.assertEqual(2, self.section.get_section_index(24))
        self.assertEqual(2, self.section.get_section_index(48))

    def test_load_dump_and_get_section_index_boundary(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.section = self.oktavia.get_section('document')

        try:
            self.section.get_section_index(-1)
            self.fail("fm.get_section_index()")
        except:
            pass
        try:
            self.section.get_section_index(49)
            self.fail("fm.get_section_index()")
        except:
            pass

    def test_load_dump_and_get_section_content(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.section = self.oktavia.get_section('document')
        self.assertEqual("abracadabra", self.section.get_content(0))
        self.assertEqual("mississippi", self.section.get_content(1))
        self.assertEqual("abracadabra2 mississippi2", self.section.get_content(2))

    def test_load_dump_and_get_section_content_boundary(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.section = self.oktavia.get_section('document')

        try:
            self.section.get_content(3)
            self.fail("fm.get_content()")
        except:
            pass
        try:
            self.section.get_content(-1)
            self.fail("fm.get_content()")
        except:
            pass

    def test_load_dump_and_get_section_name(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.section = self.oktavia.get_section('document')

        self.assertEqual("doc1", self.section.get_name(0))
        self.assertEqual("doc2", self.section.get_name(1))
        self.assertEqual("doc3", self.section.get_name(2))

    def test_load_dump_and_get_section_name_boundary(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.section = self.oktavia.get_section('document')

        try:
            self.section.get_name(3)
            self.fail("fm.get_name()")
        except:
            pass
        try:
            self.section.get_name(-1)
            self.fail("fm.get_name()")
        except:
            pass
Example #13
0
class SplitterTest(unittest.TestCase):

    def setUp(self):
        self.oktavia = Oktavia()
        self.splitter = self.oktavia.add_splitter('document')
        self.oktavia.add_word("abracadabra")
        self.splitter.split()
        self.oktavia.add_word("mississippi")
        self.splitter.split()
        self.oktavia.add_word("abracadabra mississippi")
        self.splitter.split()
        self.oktavia.build(5)

    def test_count(self):
        self.assertEqual(3, self.splitter.size())

    def test_get_splitter_index(self):
        self.assertEqual(0, self.splitter.get_index(0))
        self.assertEqual(0, self.splitter.get_index(10))
        self.assertEqual(1, self.splitter.get_index(11))
        self.assertEqual(1, self.splitter.get_index(21))
        self.assertEqual(2, self.splitter.get_index(22))
        self.assertEqual(2, self.splitter.get_index(44))

    def test_get_splitter_index_boundary(self):
        try:
            self.splitter.get_index(-1)
            self.fail("fm.get_index()")
        except:
            pass
        try:
            self.splitter.get_index(45)
            self.fail("fm.get_index()")
        except:
            pass

    def test_get_splitter_content(self):
        self.assertEqual("abracadabra mississippi", self.splitter.get_content(2))
        self.assertEqual("mississippi", self.splitter.get_content(1))
        self.assertEqual("abracadabra", self.splitter.get_content(0))

    def test_get_splitter_content_boundary(self):
        try:
            self.splitter.get_content(3)
            self.fail("fm.get_content()")
        except:
            pass
        try:
            self.splitter.get_content(-1)
            self.fail("fm.get_content()")
        except:
            pass

    def test_load_dump_and_count(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.splitter = self.oktavia.get_splitter('document')
        self.assertEqual(3, self.splitter.size())

    def test_load_dump_and_get_splitter_index(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.splitter = self.oktavia.get_splitter('document')

        self.assertEqual(0, self.splitter.get_index(0))
        self.assertEqual(0, self.splitter.get_index(10))
        self.assertEqual(1, self.splitter.get_index(11))
        self.assertEqual(1, self.splitter.get_index(21))
        self.assertEqual(2, self.splitter.get_index(22))
        self.assertEqual(2, self.splitter.get_index(44))

    def test_load_dump_and_get_splitter_index_boundary(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.splitter = self.oktavia.get_splitter('document')

        try:
            self.splitter.get_index(-1)
            self.fail("fm.get_index()")
        except:
            pass
        try:
            self.splitter.get_index(45)
            self.fail("fm.get_index()")
        except:
            pass

    def test_load_dump_and_get_splitter_content(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.splitter = self.oktavia.get_splitter('document')

        self.assertEqual("abracadabra mississippi", self.splitter.get_content(2))
        self.assertEqual("mississippi", self.splitter.get_content(1))
        self.assertEqual("abracadabra", self.splitter.get_content(0))

    def test_load_dump_and_get_splitter_content_boundary(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.splitter = self.oktavia.get_splitter('document')

        try:
            self.splitter.get_content(3)
            self.fail("fm.get_content()")
        except:
            pass
        try:
            self.splitter.get_content(-1)
            self.fail("fm.get_content()")
        except:
            pass
Example #14
0
class TableTest(unittest.TestCase):
    def setUp(self):
        self.oktavia = Oktavia()
        self.table = self.oktavia.add_table('address book',
                                            ['zip', 'city', 'area code'])

        self.oktavia.add_word("94101")  # 5
        self.table.set_column_tail_and_EOB()
        self.oktavia.add_word("San Francisco")  # 13
        self.table.set_column_tail_and_EOB()
        self.oktavia.add_word("415")  # 3
        self.table.set_column_tail_and_EOB()
        self.table.set_row_tail()

        self.oktavia.add_word("94607")  # 5
        self.table.set_column_tail_and_EOB()
        self.oktavia.add_word("Oakland")  # 7
        self.table.set_column_tail_and_EOB()
        self.oktavia.add_word("510")  # 3
        self.table.set_column_tail_and_EOB()
        self.table.set_row_tail()

        self.oktavia.add_word("94401")  # 5
        self.table.set_column_tail_and_EOB()
        self.oktavia.add_word("San Mateo")  # 9
        self.table.set_column_tail_and_EOB()
        self.oktavia.add_word("650")  # 3
        self.table.set_column_tail_and_EOB()
        self.table.set_row_tail()

        self.oktavia.build()

    def test_row_sizes(self):
        self.assertEqual(3, self.table.row_size())

    def test_column_sizes(self):
        self.assertEqual(3, self.table.column_size())

    def test_get_cell(self):
        self.assertEqual(0, self.table.get_cell(0)[0])
        self.assertEqual(0, self.table.get_cell(0)[1])
        self.assertEqual(0, self.table.get_cell(22)[0])
        self.assertEqual(2, self.table.get_cell(22)[1])
        self.assertEqual(1, self.table.get_cell(24)[0])
        self.assertEqual(0, self.table.get_cell(24)[1])
        self.assertEqual(1, self.table.get_cell(40)[0])
        self.assertEqual(2, self.table.get_cell(40)[1])
        self.assertEqual(2, self.table.get_cell(42)[0])
        self.assertEqual(0, self.table.get_cell(42)[1])
        self.assertEqual(2, self.table.get_cell(60)[0])
        self.assertEqual(2, self.table.get_cell(60)[1])

    def test_get_table_index_boundary(self):
        try:
            self.table.get_cell(-1)
            self.fail("fm.gettableIndex()")
        except:
            pass
        try:
            self.table.get_cell(62)
            self.fail("fm.gettableIndex()")
        except:
            pass

    def test_get_table_content(self):
        row = self.table.get_row_content(0)
        self.assertEqual('94101', row['zip'])
        self.assertEqual("San Francisco", row['city'])
        self.assertEqual('415', row['area code'])

    def test_get_table_content_boundary(self):
        try:
            self.table.get_content(3)
            self.fail("fm.get_content()")
        except:
            pass
        try:
            self.table.get_content(-1)
            self.fail("fm.get_content()")
        except:
            pass

    def test_load_dump_and_row_sizes(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.table = self.oktavia.get_table('address book')
        self.assertEqual(3, self.table.row_size())

    def test_load_dump_and_column_sizes(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.table = self.oktavia.get_table('address book')

        self.assertEqual(3, self.table.column_size())

    def test_load_dump_and_get_cell(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.table = self.oktavia.get_table('address book')

        self.assertEqual(0, self.table.get_cell(0)[0])
        self.assertEqual(0, self.table.get_cell(0)[1])
        self.assertEqual(0, self.table.get_cell(22)[0])
        self.assertEqual(2, self.table.get_cell(22)[1])
        self.assertEqual(1, self.table.get_cell(24)[0])
        self.assertEqual(0, self.table.get_cell(24)[1])
        self.assertEqual(1, self.table.get_cell(40)[0])
        self.assertEqual(2, self.table.get_cell(40)[1])
        self.assertEqual(2, self.table.get_cell(42)[0])
        self.assertEqual(0, self.table.get_cell(42)[1])
        self.assertEqual(2, self.table.get_cell(60)[0])
        self.assertEqual(2, self.table.get_cell(60)[1])

    def test_load_dump_and_get_table_index_boundary(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.table = self.oktavia.get_table('address book')

        try:
            self.table.get_cell(-1)
            self.fail("fm.gettableIndex()")
        except:
            pass
        try:
            self.table.get_cell(62)
            self.fail("fm.gettableIndex()")
        except:
            pass

    def test_load_dump_and_get_table_content(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.table = self.oktavia.get_table('address book')

        row = self.table.get_row_content(0)
        self.assertEqual('94101', row['zip'])
        self.assertEqual('San Francisco', row['city'])
        self.assertEqual('415', row['area code'])

    def test_load_dump_and_get_table_content_boundary(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.table = self.oktavia.get_table('address book')

        try:
            self.table.get_content(3)
            self.fail("fm.get_content()")
        except:
            pass
        try:
            self.table.get_content(-1)
            self.fail("fm.get_content()")
        except:
            pass
Example #15
0
class BlockTest(unittest.TestCase):
    def setUp(self):
        self.oktavia = Oktavia()
        self.block = self.oktavia.add_block('document')
        self.oktavia.add_word("abracadabra")
        self.block.start_block("river")
        self.oktavia.add_word("mississippi")
        self.block.end_block()
        self.oktavia.add_word("abracadabra mississippi")
        self.oktavia.build()

    def test_doc_sizes(self):
        self.assertEqual(1, self.block.size())

    def test_in_block(self):
        self.assertFalse(self.block.in_block(0))
        self.assertFalse(self.block.in_block(10))
        self.assertTrue(self.block.in_block(11))
        self.assertTrue(self.block.in_block(21))
        self.assertFalse(self.block.in_block(22))
        self.assertFalse(self.block.in_block(44))

    def test_in_block_boundary(self):
        try:
            self.block.in_block(-1)
            self.fail("fm.in_block() 1")
        except:
            pass
        try:
            self.block.in_block(45)
            self.fail("fm.in_block() 2")
        except:
            pass

    def test_get_block_content(self):
        self.assertEqual("mississippi", self.block.get_block_content(11))

    def test_get_block_content_boundary(self):
        try:
            self.block.get_block_content(45)
            self.fail("fm.getContent()")
        except:
            pass
        try:
            self.block.get_block_content(-1)
            self.fail("fm.getContent()")
        except:
            pass

    def test_get_block_name(self):
        self.assertEqual("river", self.block.get_block_name(11))

    def test_get_block_name_boundary(self):
        try:
            self.block.get_block_name(45)
            self.fail("fm.getName()")
        except:
            pass
        try:
            self.block.get_block_name(-1)
            self.fail("fm.getName()")
        except:
            pass

    def test_dump_load_and_doc_sizes(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.block = self.oktavia.get_block('document')

        self.assertEqual(1, self.block.size())

    def test_load_dump_and_in_block(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.block = self.oktavia.get_block('document')

        self.assertFalse(self.block.in_block(0))
        self.assertFalse(self.block.in_block(10))
        self.assertTrue(self.block.in_block(11))
        self.assertTrue(self.block.in_block(21))
        self.assertFalse(self.block.in_block(22))
        self.assertFalse(self.block.in_block(44))

    def test_load_dump_and_in_block_boundary(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.block = self.oktavia.get_block('document')

        try:
            self.block.in_block(-1)
            self.fail("fm.in_block() 1")
        except:
            pass
        try:
            self.block.in_block(45)
            self.fail("fm.in_block() 2")
        except:
            pass

    def test_load_dump_and_get_block_content(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.block = self.oktavia.get_block('document')

        self.assertEqual("mississippi", self.block.get_block_content(11))

    def test_load_dump_and_get_block_content_boundary(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.block = self.oktavia.get_block('document')

        try:
            self.block.get_block_content(45)
            self.fail("fm.getContent()")
        except:
            pass
        try:
            self.block.get_block_content(-1)
            self.fail("fm.getContent()")
        except:
            pass

    def test_load_dump_and_get_block_name(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.block = self.oktavia.get_block('document')

        self.assertEqual("river", self.block.get_block_name(11))

    def test_load_dump_and_get_block_name_boundary(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.block = self.oktavia.get_block('document')

        try:
            self.block.get_block_name(45)
            self.fail("fm.getName()")
        except:
            pass
        try:
            self.block.get_block_name(-1)
            self.fail("fm.getName()")
        except:
            pass
Example #16
0
class SplitterTest(unittest.TestCase):
    def setUp(self):
        self.oktavia = Oktavia()
        self.splitter = self.oktavia.add_splitter('document')
        self.oktavia.add_word("abracadabra")
        self.splitter.split()
        self.oktavia.add_word("mississippi")
        self.splitter.split()
        self.oktavia.add_word("abracadabra mississippi")
        self.splitter.split()
        self.oktavia.build(5)

    def test_count(self):
        self.assertEqual(3, self.splitter.size())

    def test_get_splitter_index(self):
        self.assertEqual(0, self.splitter.get_index(0))
        self.assertEqual(0, self.splitter.get_index(10))
        self.assertEqual(1, self.splitter.get_index(11))
        self.assertEqual(1, self.splitter.get_index(21))
        self.assertEqual(2, self.splitter.get_index(22))
        self.assertEqual(2, self.splitter.get_index(44))

    def test_get_splitter_index_boundary(self):
        try:
            self.splitter.get_index(-1)
            self.fail("fm.get_index()")
        except:
            pass
        try:
            self.splitter.get_index(45)
            self.fail("fm.get_index()")
        except:
            pass

    def test_get_splitter_content(self):
        self.assertEqual("abracadabra mississippi",
                         self.splitter.get_content(2))
        self.assertEqual("mississippi", self.splitter.get_content(1))
        self.assertEqual("abracadabra", self.splitter.get_content(0))

    def test_get_splitter_content_boundary(self):
        try:
            self.splitter.get_content(3)
            self.fail("fm.get_content()")
        except:
            pass
        try:
            self.splitter.get_content(-1)
            self.fail("fm.get_content()")
        except:
            pass

    def test_load_dump_and_count(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.splitter = self.oktavia.get_splitter('document')
        self.assertEqual(3, self.splitter.size())

    def test_load_dump_and_get_splitter_index(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.splitter = self.oktavia.get_splitter('document')

        self.assertEqual(0, self.splitter.get_index(0))
        self.assertEqual(0, self.splitter.get_index(10))
        self.assertEqual(1, self.splitter.get_index(11))
        self.assertEqual(1, self.splitter.get_index(21))
        self.assertEqual(2, self.splitter.get_index(22))
        self.assertEqual(2, self.splitter.get_index(44))

    def test_load_dump_and_get_splitter_index_boundary(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.splitter = self.oktavia.get_splitter('document')

        try:
            self.splitter.get_index(-1)
            self.fail("fm.get_index()")
        except:
            pass
        try:
            self.splitter.get_index(45)
            self.fail("fm.get_index()")
        except:
            pass

    def test_load_dump_and_get_splitter_content(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.splitter = self.oktavia.get_splitter('document')

        self.assertEqual("abracadabra mississippi",
                         self.splitter.get_content(2))
        self.assertEqual("mississippi", self.splitter.get_content(1))
        self.assertEqual("abracadabra", self.splitter.get_content(0))

    def test_load_dump_and_get_splitter_content_boundary(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.splitter = self.oktavia.get_splitter('document')

        try:
            self.splitter.get_content(3)
            self.fail("fm.get_content()")
        except:
            pass
        try:
            self.splitter.get_content(-1)
            self.fail("fm.get_content()")
        except:
            pass
Example #17
0
class StemmingTest(unittest.TestCase):
    def setUp(self):
        self.oktavia = Oktavia()
        self.oktavia.set_stemmer(snowballstemmer.EnglishStemmer())
        self.section = self.oktavia.add_section(u'document')
        self.oktavia.add_word(u"stemming baby", stemming=True)
        self.section.set_tail(u"doc1")
        self.oktavia.add_word(u"stemmed babies", stemming=True)
        self.section.set_tail(u"doc2")
        self.oktavia.build()

    def test_search_without_stemming(self):
        results = self.oktavia.raw_search(u'baby', stemming=False)
        self.assertEqual(1, len(results))

    def test_search_with_stemming(self):
        results = self.oktavia.raw_search(u'baby', stemming=True)
        self.assertEqual(1, len(results))

    def test_load_dump_and_search_without_stemming(self):
        dump = self.oktavia.dump()
        oktavia = Oktavia()
        oktavia.set_stemmer(snowballstemmer.EnglishStemmer())
        oktavia.load(dump)
        results = oktavia.raw_search(u'baby', stemming=False)
        self.assertEqual(1, len(results))

    def test_load_dump_and_search_with_stemming(self):
        dump = self.oktavia.dump()
        oktavia = Oktavia()
        oktavia.set_stemmer(snowballstemmer.EnglishStemmer())
        oktavia.load(dump)
        results = oktavia.raw_search(u'baby', stemming=True)
        self.assertEqual(1, len(results))
Example #18
0
class BlockTest(unittest.TestCase):

    def setUp(self):
        self.oktavia = Oktavia()
        self.block = self.oktavia.add_block('document')
        self.oktavia.add_word("abracadabra")
        self.block.start_block("river")
        self.oktavia.add_word("mississippi")
        self.block.end_block()
        self.oktavia.add_word("abracadabra mississippi")
        self.oktavia.build()

    def test_doc_sizes(self):
        self.assertEqual(1, self.block.size())

    def test_in_block(self):
        self.assertFalse(self.block.in_block(0))
        self.assertFalse(self.block.in_block(10))
        self.assertTrue(self.block.in_block(11))
        self.assertTrue(self.block.in_block(21))
        self.assertFalse(self.block.in_block(22))
        self.assertFalse(self.block.in_block(44))

    def test_in_block_boundary(self):
        try:
            self.block.in_block(-1)
            self.fail("fm.in_block() 1")
        except:
            pass
        try:
            self.block.in_block(45)
            self.fail("fm.in_block() 2")
        except:
            pass

    def test_get_block_content(self):
        self.assertEqual("mississippi", self.block.get_block_content(11))

    def test_get_block_content_boundary(self):
        try:
            self.block.get_block_content(45)
            self.fail("fm.getContent()")
        except:
            pass
        try:
            self.block.get_block_content(-1)
            self.fail("fm.getContent()")
        except:
            pass

    def test_get_block_name(self):
        self.assertEqual("river", self.block.get_block_name(11))

    def test_get_block_name_boundary(self):
        try:
            self.block.get_block_name(45)
            self.fail("fm.getName()")
        except:
            pass
        try: 
            self.block.get_block_name(-1)
            self.fail("fm.getName()")
        except:
            pass

    def test_dump_load_and_doc_sizes(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.block = self.oktavia.get_block('document')

        self.assertEqual(1, self.block.size())

    def test_load_dump_and_in_block(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.block = self.oktavia.get_block('document')

        self.assertFalse(self.block.in_block(0))
        self.assertFalse(self.block.in_block(10))
        self.assertTrue(self.block.in_block(11))
        self.assertTrue(self.block.in_block(21))
        self.assertFalse(self.block.in_block(22))
        self.assertFalse(self.block.in_block(44))

    def test_load_dump_and_in_block_boundary(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.block = self.oktavia.get_block('document')

        try:
            self.block.in_block(-1)
            self.fail("fm.in_block() 1")
        except:
            pass
        try:
            self.block.in_block(45)
            self.fail("fm.in_block() 2")
        except:
            pass

    def test_load_dump_and_get_block_content(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.block = self.oktavia.get_block('document')

        self.assertEqual("mississippi", self.block.get_block_content(11))

    def test_load_dump_and_get_block_content_boundary(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.block = self.oktavia.get_block('document')

        try:
            self.block.get_block_content(45)
            self.fail("fm.getContent()")
        except:
            pass
        try:
            self.block.get_block_content(-1)
            self.fail("fm.getContent()")
        except:
            pass

    def test_load_dump_and_get_block_name(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.block = self.oktavia.get_block('document')

        self.assertEqual("river", self.block.get_block_name(11))

    def test_load_dump_and_get_block_name_boundary(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.block = self.oktavia.get_block('document')

        try:
            self.block.get_block_name(45)
            self.fail("fm.getName()")
        except:
            pass
        try:
            self.block.get_block_name(-1)
            self.fail("fm.getName()")
        except:
            pass
Example #19
0
class TableTest(unittest.TestCase):

    def setUp(self):
        self.oktavia = Oktavia()
        self.table = self.oktavia.add_table('address book', ['zip', 'city', 'area code'])

        self.oktavia.add_word("94101") # 5
        self.table.set_column_tail_and_EOB()
        self.oktavia.add_word("San Francisco") # 13
        self.table.set_column_tail_and_EOB()
        self.oktavia.add_word("415") # 3
        self.table.set_column_tail_and_EOB()
        self.table.set_row_tail()

        self.oktavia.add_word("94607") # 5
        self.table.set_column_tail_and_EOB()
        self.oktavia.add_word("Oakland") # 7
        self.table.set_column_tail_and_EOB()
        self.oktavia.add_word("510") # 3
        self.table.set_column_tail_and_EOB()
        self.table.set_row_tail()

        self.oktavia.add_word("94401") # 5
        self.table.set_column_tail_and_EOB()
        self.oktavia.add_word("San Mateo") # 9
        self.table.set_column_tail_and_EOB()
        self.oktavia.add_word("650") # 3
        self.table.set_column_tail_and_EOB()
        self.table.set_row_tail()

        self.oktavia.build()

    def test_row_sizes(self):
        self.assertEqual(3, self.table.row_size())

    def test_column_sizes(self):
        self.assertEqual(3, self.table.column_size())

    def test_get_cell(self):
        self.assertEqual(0, self.table.get_cell(0)[0])
        self.assertEqual(0, self.table.get_cell(0)[1])
        self.assertEqual(0, self.table.get_cell(22)[0])
        self.assertEqual(2, self.table.get_cell(22)[1])
        self.assertEqual(1, self.table.get_cell(24)[0])
        self.assertEqual(0, self.table.get_cell(24)[1])
        self.assertEqual(1, self.table.get_cell(40)[0])
        self.assertEqual(2, self.table.get_cell(40)[1])
        self.assertEqual(2, self.table.get_cell(42)[0])
        self.assertEqual(0, self.table.get_cell(42)[1])
        self.assertEqual(2, self.table.get_cell(60)[0])
        self.assertEqual(2, self.table.get_cell(60)[1])

    def test_get_table_index_boundary(self):
        try:
            self.table.get_cell(-1)
            self.fail("fm.gettableIndex()")
        except:
            pass
        try:
            self.table.get_cell(62)
            self.fail("fm.gettableIndex()")
        except:
            pass

    def test_get_table_content(self):
        row = self.table.get_row_content(0)
        self.assertEqual('94101', row['zip'])
        self.assertEqual("San Francisco", row['city'])
        self.assertEqual('415', row['area code'])

    def test_get_table_content_boundary(self):
        try:
            self.table.get_content(3)
            self.fail("fm.get_content()")
        except:
            pass
        try:
            self.table.get_content(-1)
            self.fail("fm.get_content()")
        except:
            pass

    def test_load_dump_and_row_sizes(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.table = self.oktavia.get_table('address book')
        self.assertEqual(3, self.table.row_size())

    def test_load_dump_and_column_sizes(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.table = self.oktavia.get_table('address book')

        self.assertEqual(3, self.table.column_size())

    def test_load_dump_and_get_cell(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.table = self.oktavia.get_table('address book')

        self.assertEqual(0, self.table.get_cell(0)[0])
        self.assertEqual(0, self.table.get_cell(0)[1])
        self.assertEqual(0, self.table.get_cell(22)[0])
        self.assertEqual(2, self.table.get_cell(22)[1])
        self.assertEqual(1, self.table.get_cell(24)[0])
        self.assertEqual(0, self.table.get_cell(24)[1])
        self.assertEqual(1, self.table.get_cell(40)[0])
        self.assertEqual(2, self.table.get_cell(40)[1])
        self.assertEqual(2, self.table.get_cell(42)[0])
        self.assertEqual(0, self.table.get_cell(42)[1])
        self.assertEqual(2, self.table.get_cell(60)[0])
        self.assertEqual(2, self.table.get_cell(60)[1])

    def test_load_dump_and_get_table_index_boundary(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.table = self.oktavia.get_table('address book')

        try:
            self.table.get_cell(-1)
            self.fail("fm.gettableIndex()")
        except:
            pass
        try:
            self.table.get_cell(62)
            self.fail("fm.gettableIndex()")
        except:
            pass

    def test_load_dump_and_get_table_content(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.table = self.oktavia.get_table('address book')

        row = self.table.get_row_content(0)
        self.assertEqual('94101', row['zip'])
        self.assertEqual('San Francisco', row['city'])
        self.assertEqual('415', row['area code'])

    def test_load_dump_and_get_table_content_boundary(self):
        dump = self.oktavia.dump()
        self.oktavia.load(dump)
        self.table = self.oktavia.get_table('address book')

        try:
            self.table.get_content(3)
            self.fail("fm.get_content()")
        except:
            pass
        try:
            self.table.get_content(-1)
            self.fail("fm.get_content()")
        except:
            pass