class StemmingTest(unittest.TestCase): def setUp(self): self.oktavia = Oktavia() self.oktavia.set_stemmer(snowballstemmer.EnglishStemmer()) self.section = self.oktavia.add_section(u'document') self.oktavia.add_word(u"stemming baby", stemming=True) self.section.set_tail(u"doc1") self.oktavia.add_word(u"stemmed babies", stemming=True) self.section.set_tail(u"doc2") self.oktavia.build() def test_search_without_stemming(self): results = self.oktavia.raw_search(u'baby', stemming=False) self.assertEqual(1, len(results)) def test_search_with_stemming(self): results = self.oktavia.raw_search(u'baby', stemming=True) self.assertEqual(1, len(results)) def test_load_dump_and_search_without_stemming(self): dump = self.oktavia.dump() oktavia = Oktavia() oktavia.set_stemmer(snowballstemmer.EnglishStemmer()) oktavia.load(dump) results = oktavia.raw_search(u'baby', stemming=False) self.assertEqual(1, len(results)) def test_load_dump_and_search_with_stemming(self): dump = self.oktavia.dump() oktavia = Oktavia() oktavia.set_stemmer(snowballstemmer.EnglishStemmer()) oktavia.load(dump) results = oktavia.raw_search(u'baby', stemming=True) self.assertEqual(1, len(results))
class TableTest(unittest.TestCase): def setUp(self): self.oktavia = Oktavia() self.table = self.oktavia.add_table('address book', ['zip', 'city', 'area code']) self.oktavia.add_word("94101") # 5 self.table.set_column_tail_and_EOB() self.oktavia.add_word("San Francisco") # 13 self.table.set_column_tail_and_EOB() self.oktavia.add_word("415") # 3 self.table.set_column_tail_and_EOB() self.table.set_row_tail() self.oktavia.add_word("94607") # 5 self.table.set_column_tail_and_EOB() self.oktavia.add_word("Oakland") # 7 self.table.set_column_tail_and_EOB() self.oktavia.add_word("510") # 3 self.table.set_column_tail_and_EOB() self.table.set_row_tail() self.oktavia.add_word("94401") # 5 self.table.set_column_tail_and_EOB() self.oktavia.add_word("San Mateo") # 9 self.table.set_column_tail_and_EOB() self.oktavia.add_word("650") # 3 self.table.set_column_tail_and_EOB() self.table.set_row_tail() self.oktavia.build() def test_row_sizes(self): self.assertEqual(3, self.table.row_size()) def test_column_sizes(self): self.assertEqual(3, self.table.column_size()) def test_get_cell(self): self.assertEqual(0, self.table.get_cell(0)[0]) self.assertEqual(0, self.table.get_cell(0)[1]) self.assertEqual(0, self.table.get_cell(22)[0]) self.assertEqual(2, self.table.get_cell(22)[1]) self.assertEqual(1, self.table.get_cell(24)[0]) self.assertEqual(0, self.table.get_cell(24)[1]) self.assertEqual(1, self.table.get_cell(40)[0]) self.assertEqual(2, self.table.get_cell(40)[1]) self.assertEqual(2, self.table.get_cell(42)[0]) self.assertEqual(0, self.table.get_cell(42)[1]) self.assertEqual(2, self.table.get_cell(60)[0]) self.assertEqual(2, self.table.get_cell(60)[1]) def test_get_table_index_boundary(self): try: self.table.get_cell(-1) self.fail("fm.gettableIndex()") except: pass try: self.table.get_cell(62) self.fail("fm.gettableIndex()") except: pass def test_get_table_content(self): row = self.table.get_row_content(0) self.assertEqual('94101', row['zip']) self.assertEqual("San Francisco", row['city']) self.assertEqual('415', row['area code']) def test_get_table_content_boundary(self): try: self.table.get_content(3) self.fail("fm.get_content()") except: pass try: self.table.get_content(-1) self.fail("fm.get_content()") except: pass def test_load_dump_and_row_sizes(self): dump = self.oktavia.dump() self.oktavia.load(dump) self.table = self.oktavia.get_table('address book') self.assertEqual(3, self.table.row_size()) def test_load_dump_and_column_sizes(self): dump = self.oktavia.dump() self.oktavia.load(dump) self.table = self.oktavia.get_table('address book') self.assertEqual(3, self.table.column_size()) def test_load_dump_and_get_cell(self): dump = self.oktavia.dump() self.oktavia.load(dump) self.table = self.oktavia.get_table('address book') self.assertEqual(0, self.table.get_cell(0)[0]) self.assertEqual(0, self.table.get_cell(0)[1]) self.assertEqual(0, self.table.get_cell(22)[0]) self.assertEqual(2, self.table.get_cell(22)[1]) self.assertEqual(1, self.table.get_cell(24)[0]) self.assertEqual(0, self.table.get_cell(24)[1]) self.assertEqual(1, self.table.get_cell(40)[0]) self.assertEqual(2, self.table.get_cell(40)[1]) self.assertEqual(2, self.table.get_cell(42)[0]) self.assertEqual(0, self.table.get_cell(42)[1]) self.assertEqual(2, self.table.get_cell(60)[0]) self.assertEqual(2, self.table.get_cell(60)[1]) def test_load_dump_and_get_table_index_boundary(self): dump = self.oktavia.dump() self.oktavia.load(dump) self.table = self.oktavia.get_table('address book') try: self.table.get_cell(-1) self.fail("fm.gettableIndex()") except: pass try: self.table.get_cell(62) self.fail("fm.gettableIndex()") except: pass def test_load_dump_and_get_table_content(self): dump = self.oktavia.dump() self.oktavia.load(dump) self.table = self.oktavia.get_table('address book') row = self.table.get_row_content(0) self.assertEqual('94101', row['zip']) self.assertEqual('San Francisco', row['city']) self.assertEqual('415', row['area code']) def test_load_dump_and_get_table_content_boundary(self): dump = self.oktavia.dump() self.oktavia.load(dump) self.table = self.oktavia.get_table('address book') try: self.table.get_content(3) self.fail("fm.get_content()") except: pass try: self.table.get_content(-1) self.fail("fm.get_content()") except: pass
class SectionTest(unittest.TestCase): def setUp(self): self.oktavia = Oktavia() self.section = self.oktavia.add_section(u'document') self.oktavia.add_word(u"abracadabra") self.section.set_tail(u"doc1") self.oktavia.add_end_of_block() self.oktavia.add_word(u"mississippi") self.section.set_tail(u"doc2") self.oktavia.add_end_of_block() self.oktavia.add_word(u"abracadabra2 mississippi2") self.section.set_tail(u"doc3") self.oktavia.add_end_of_block() self.oktavia.build(25) def test_doc_sizes(self): self.assertEqual(3, self.section.size()) def test_get_section_index(self): self.assertEqual(0, self.section.get_section_index(0)) self.assertEqual(0, self.section.get_section_index(10)) self.assertEqual(1, self.section.get_section_index(12)) self.assertEqual(1, self.section.get_section_index(22)) self.assertEqual(2, self.section.get_section_index(24)) self.assertEqual(2, self.section.get_section_index(48)) def test_get_section_index_boundary(self): try: self.section.get_section_index(-1) self.fail("fm.get_section_index(): -1") except: pass try: self.section.get_section_index(49) self.fail("fm.get_section_index(): 49") except: pass def test_get_section_content(self): self.assertEqual(u"abracadabra", self.section.get_content(0)) self.assertEqual(u"mississippi", self.section.get_content(1)) self.assertEqual(u"abracadabra2 mississippi2", self.section.get_content(2)) def test_get_section_content_boundary(self): try: self.section.get_content(3) self.fail("fm.get_content()") except: pass try: self.section.get_content(-1) self.fail("fm.get_content()") except: pass def test_get_section_name(self): self.assertEqual("doc1", self.section.get_name(0)) self.assertEqual("doc2", self.section.get_name(1)) self.assertEqual("doc3", self.section.get_name(2)) def test_get_section_name_boundary(self): try: self.section.get_name(3) self.fail("fm.get_name()") except: pass try: self.section.get_name(-1) self.fail("fm.get_name()") except: pass def test_load_dump_and_doc_sizes(self): dump = self.oktavia.dump() self.oktavia.load(dump) self.section = self.oktavia.get_section('document') self.assertEqual(3, self.section.size()) def test_load_dump_and_get_section_index(self): dump = self.oktavia.dump() self.oktavia.load(dump) self.section = self.oktavia.get_section('document') self.assertEqual(0, self.section.get_section_index(0)) self.assertEqual(0, self.section.get_section_index(10)) self.assertEqual(1, self.section.get_section_index(12)) self.assertEqual(1, self.section.get_section_index(22)) self.assertEqual(2, self.section.get_section_index(24)) self.assertEqual(2, self.section.get_section_index(48)) def test_load_dump_and_get_section_index_boundary(self): dump = self.oktavia.dump() self.oktavia.load(dump) self.section = self.oktavia.get_section('document') try: self.section.get_section_index(-1) self.fail("fm.get_section_index()") except: pass try: self.section.get_section_index(49) self.fail("fm.get_section_index()") except: pass def test_load_dump_and_get_section_content(self): dump = self.oktavia.dump() self.oktavia.load(dump) self.section = self.oktavia.get_section('document') self.assertEqual("abracadabra", self.section.get_content(0)) self.assertEqual("mississippi", self.section.get_content(1)) self.assertEqual("abracadabra2 mississippi2", self.section.get_content(2)) def test_load_dump_and_get_section_content_boundary(self): dump = self.oktavia.dump() self.oktavia.load(dump) self.section = self.oktavia.get_section('document') try: self.section.get_content(3) self.fail("fm.get_content()") except: pass try: self.section.get_content(-1) self.fail("fm.get_content()") except: pass def test_load_dump_and_get_section_name(self): dump = self.oktavia.dump() self.oktavia.load(dump) self.section = self.oktavia.get_section('document') self.assertEqual("doc1", self.section.get_name(0)) self.assertEqual("doc2", self.section.get_name(1)) self.assertEqual("doc3", self.section.get_name(2)) def test_load_dump_and_get_section_name_boundary(self): dump = self.oktavia.dump() self.oktavia.load(dump) self.section = self.oktavia.get_section('document') try: self.section.get_name(3) self.fail("fm.get_name()") except: pass try: self.section.get_name(-1) self.fail("fm.get_name()") except: pass
class SplitterTest(unittest.TestCase): def setUp(self): self.oktavia = Oktavia() self.splitter = self.oktavia.add_splitter('document') self.oktavia.add_word("abracadabra") self.splitter.split() self.oktavia.add_word("mississippi") self.splitter.split() self.oktavia.add_word("abracadabra mississippi") self.splitter.split() self.oktavia.build(5) def test_count(self): self.assertEqual(3, self.splitter.size()) def test_get_splitter_index(self): self.assertEqual(0, self.splitter.get_index(0)) self.assertEqual(0, self.splitter.get_index(10)) self.assertEqual(1, self.splitter.get_index(11)) self.assertEqual(1, self.splitter.get_index(21)) self.assertEqual(2, self.splitter.get_index(22)) self.assertEqual(2, self.splitter.get_index(44)) def test_get_splitter_index_boundary(self): try: self.splitter.get_index(-1) self.fail("fm.get_index()") except: pass try: self.splitter.get_index(45) self.fail("fm.get_index()") except: pass def test_get_splitter_content(self): self.assertEqual("abracadabra mississippi", self.splitter.get_content(2)) self.assertEqual("mississippi", self.splitter.get_content(1)) self.assertEqual("abracadabra", self.splitter.get_content(0)) def test_get_splitter_content_boundary(self): try: self.splitter.get_content(3) self.fail("fm.get_content()") except: pass try: self.splitter.get_content(-1) self.fail("fm.get_content()") except: pass def test_load_dump_and_count(self): dump = self.oktavia.dump() self.oktavia.load(dump) self.splitter = self.oktavia.get_splitter('document') self.assertEqual(3, self.splitter.size()) def test_load_dump_and_get_splitter_index(self): dump = self.oktavia.dump() self.oktavia.load(dump) self.splitter = self.oktavia.get_splitter('document') self.assertEqual(0, self.splitter.get_index(0)) self.assertEqual(0, self.splitter.get_index(10)) self.assertEqual(1, self.splitter.get_index(11)) self.assertEqual(1, self.splitter.get_index(21)) self.assertEqual(2, self.splitter.get_index(22)) self.assertEqual(2, self.splitter.get_index(44)) def test_load_dump_and_get_splitter_index_boundary(self): dump = self.oktavia.dump() self.oktavia.load(dump) self.splitter = self.oktavia.get_splitter('document') try: self.splitter.get_index(-1) self.fail("fm.get_index()") except: pass try: self.splitter.get_index(45) self.fail("fm.get_index()") except: pass def test_load_dump_and_get_splitter_content(self): dump = self.oktavia.dump() self.oktavia.load(dump) self.splitter = self.oktavia.get_splitter('document') self.assertEqual("abracadabra mississippi", self.splitter.get_content(2)) self.assertEqual("mississippi", self.splitter.get_content(1)) self.assertEqual("abracadabra", self.splitter.get_content(0)) def test_load_dump_and_get_splitter_content_boundary(self): dump = self.oktavia.dump() self.oktavia.load(dump) self.splitter = self.oktavia.get_splitter('document') try: self.splitter.get_content(3) self.fail("fm.get_content()") except: pass try: self.splitter.get_content(-1) self.fail("fm.get_content()") except: pass
class BlockTest(unittest.TestCase): def setUp(self): self.oktavia = Oktavia() self.block = self.oktavia.add_block('document') self.oktavia.add_word("abracadabra") self.block.start_block("river") self.oktavia.add_word("mississippi") self.block.end_block() self.oktavia.add_word("abracadabra mississippi") self.oktavia.build() def test_doc_sizes(self): self.assertEqual(1, self.block.size()) def test_in_block(self): self.assertFalse(self.block.in_block(0)) self.assertFalse(self.block.in_block(10)) self.assertTrue(self.block.in_block(11)) self.assertTrue(self.block.in_block(21)) self.assertFalse(self.block.in_block(22)) self.assertFalse(self.block.in_block(44)) def test_in_block_boundary(self): try: self.block.in_block(-1) self.fail("fm.in_block() 1") except: pass try: self.block.in_block(45) self.fail("fm.in_block() 2") except: pass def test_get_block_content(self): self.assertEqual("mississippi", self.block.get_block_content(11)) def test_get_block_content_boundary(self): try: self.block.get_block_content(45) self.fail("fm.getContent()") except: pass try: self.block.get_block_content(-1) self.fail("fm.getContent()") except: pass def test_get_block_name(self): self.assertEqual("river", self.block.get_block_name(11)) def test_get_block_name_boundary(self): try: self.block.get_block_name(45) self.fail("fm.getName()") except: pass try: self.block.get_block_name(-1) self.fail("fm.getName()") except: pass def test_dump_load_and_doc_sizes(self): dump = self.oktavia.dump() self.oktavia.load(dump) self.block = self.oktavia.get_block('document') self.assertEqual(1, self.block.size()) def test_load_dump_and_in_block(self): dump = self.oktavia.dump() self.oktavia.load(dump) self.block = self.oktavia.get_block('document') self.assertFalse(self.block.in_block(0)) self.assertFalse(self.block.in_block(10)) self.assertTrue(self.block.in_block(11)) self.assertTrue(self.block.in_block(21)) self.assertFalse(self.block.in_block(22)) self.assertFalse(self.block.in_block(44)) def test_load_dump_and_in_block_boundary(self): dump = self.oktavia.dump() self.oktavia.load(dump) self.block = self.oktavia.get_block('document') try: self.block.in_block(-1) self.fail("fm.in_block() 1") except: pass try: self.block.in_block(45) self.fail("fm.in_block() 2") except: pass def test_load_dump_and_get_block_content(self): dump = self.oktavia.dump() self.oktavia.load(dump) self.block = self.oktavia.get_block('document') self.assertEqual("mississippi", self.block.get_block_content(11)) def test_load_dump_and_get_block_content_boundary(self): dump = self.oktavia.dump() self.oktavia.load(dump) self.block = self.oktavia.get_block('document') try: self.block.get_block_content(45) self.fail("fm.getContent()") except: pass try: self.block.get_block_content(-1) self.fail("fm.getContent()") except: pass def test_load_dump_and_get_block_name(self): dump = self.oktavia.dump() self.oktavia.load(dump) self.block = self.oktavia.get_block('document') self.assertEqual("river", self.block.get_block_name(11)) def test_load_dump_and_get_block_name_boundary(self): dump = self.oktavia.dump() self.oktavia.load(dump) self.block = self.oktavia.get_block('document') try: self.block.get_block_name(45) self.fail("fm.getName()") except: pass try: self.block.get_block_name(-1) self.fail("fm.getName()") except: pass