class BookParserTest(unittest.TestCase): def setUp(self): self.book_parser = BookParser() def test_load_word_data(self): expected_dict = { 'hello': [ {'word': 'hello', 'sentence': 'he said hello', 'book': 'Treasure Island', 'chapter': '10 - Ahoy', 'filename': 'file1.html'} ] } self.book_parser.load_word_data( 'hello', 'file1.html', 'he said hello', 'Treasure Island', '10 - Ahoy') self.assertItemsEqual(expected_dict, self.book_parser.get_full_dict())
def create_database(): """ Uses BookParser to process the html book files into a data structure in memory then iterates through and stores in a sqlite file. """ conn = sqlite3.connect('test.db') cur = conn.cursor() create_table = """ CREATE TABLE IF NOT EXISTS words (word TEXT PRIMARY KEY, serialized_context_list TEXT) """ cur.execute(create_table) insert_row = """ INSERT OR REPLACE INTO words VALUES (?, ?) """ book_parser = BookParser() book_parser.initiate() for word, context_list in book_parser.get_full_dict().iteritems(): cur.execute(insert_row, (word.decode('utf-8'), json.dumps(context_list))) conn.commit() conn.close()