def generateDataBase(self): """ This function generates word database base on list of text files Input: Nothing Returns: Word database """ from database import WordDataBase from wordparser import WordParser from record import Record words = set() for file_path in self._file_list: contents = self._readFileContents(file_path) words = words.union(WordParser(contents).wordsSet()) word_db = WordDataBase() word_db.addWord(Record("")) # Index 0 is not used. for word in words: word_db.addWord(word) return word_db
def setUp(self): from trainer import Trainer from database import TrainingDataBase,WordDataBase,WordRecord self.tr_empty = Trainer(WordDataBase(),TrainingDataBase()) wdb = WordDataBase() wdb.addWord(WordRecord("aaa")) wdb.addWord(WordRecord("bbb")) wdb.addWord(WordRecord("ccc")) tdb = TrainingDataBase() tdb.add([WordRecord("aaa"),WordRecord("bbb"),WordRecord("ccc")],[WordRecord("ccc"),WordRecord("bbb")]) tdb.add([WordRecord("aaa"),WordRecord("ccc")],[WordRecord("ccc"),WordRecord("ccc")]) self.tr_notempty = Trainer(wdb,tdb)
class WordDataBaseTest(unittest.TestCase): def setUp(self): from database import WordDataBase from database import WordRecord self.db = WordDataBase() self.db.addWord(WordRecord("ccc")) self.db.addWord(WordRecord("bbb")) self.db.addWord(WordRecord("aaa")) def test_wordId_invalidword(self): from database import DataBaseException from database import WordRecord with self.assertRaises(DataBaseException): self.db.wordId(None) with self.assertRaises(DataBaseException): self.db.wordId(WordRecord("ddd")) def test_wordId_validword(self): from database import DataBaseException from database import WordRecord self.assertEqual(self.db.wordId(WordRecord("ccc")),0) self.assertEqual(self.db.wordId(WordRecord("bbb")),1) self.assertEqual(self.db.wordId(WordRecord("aaa")),2) def test_addWord_invalidword(self): from database import WordRecord from database import DataBaseException with self.assertRaises(DataBaseException): self.db.addWord(None) with self.assertRaises(DataBaseException): self.db.addWord(WordRecord("ccc")) def test_idWord_invalidinput(self): from database import DataBaseException with self.assertRaises(DataBaseException): self.db.idWord(None) with self.assertRaises(DataBaseException): self.db.idWord(3) with self.assertRaises(DataBaseException): self.db.idWord(-5) def test_multipleWordId(self): from database import WordRecord ids = self.db.multipleWordId([WordRecord("aaa"),WordRecord("bbb"),WordRecord("ccc")]) self.assertEqual(ids[0],2) self.assertEqual(ids[1],1) self.assertEqual(ids[2],0) def test_multipleWordId_exception(self): from database import WordRecord from database import DataBaseException with self.assertRaises(DataBaseException): ids = self.db.multipleWordId([WordRecord("kkk"),WordRecord("hhh"),WordRecord("www")]) def test_multipleIdWord(self): from database import WordRecord words = self.db.multipleIdWord([2,1,0,0]) self.assertEqual(words[0],WordRecord("aaa")) self.assertEqual(words[1],WordRecord("bbb")) self.assertEqual(words[2],WordRecord("ccc")) self.assertEqual(words[3],WordRecord("ccc")) def test_multipleIdWord_exception(self): from database import DataBaseException with self.assertRaises(DataBaseException): words = self.db.multipleIdWord([5,5,5])