def test_init_no_id(self): corpus = Corpus(words=["keiffster", "ABCDEF123", "PASSWORD123"]) self.assertIsNotNone(corpus) self.assertIsNone(corpus.id) self.assertEqual(["keiffster", "ABCDEF123", "PASSWORD123"], corpus.words) self.assertEqual({'words': ['keiffster', 'ABCDEF123', 'PASSWORD123']}, corpus.to_document())
def test_from_document_no_id(self): corpus1 = Corpus.from_document( {'words': ['keiffster', 'ABCDEF123', 'PASSWORD123']}) self.assertIsNotNone(corpus1) self.assertIsNone(corpus1.id) self.assertEqual(['keiffster', 'ABCDEF123', 'PASSWORD123'], corpus1.words)
def upload_from_file(self, filename, format=Store.TEXT_FORMAT, commit=True, verbose=False): YLogger.debug(self, "Uplading spelling corpus file [%s] to Mongo", filename) count = 0 try: corpus_words = [] with open(filename, "r") as text_file: for lines in text_file: words = lines.split(' ') for word in words: corpus_words.append(word) corpus = Corpus(words=corpus_words) self.add_document(corpus) if verbose is True: print(corpus_words) if commit is True: self.commit() count = len(corpus_words) except Exception as excep: YLogger.exception(self, "Failed to load spelling corpus from [%s]", excep, filename) # Assume all words loaded are success, no need for additional count return count, count
def test_from_document(self): corpus1 = Corpus.from_document( {'words': ['keiffster', 'ABCDEF123', 'PASSWORD123']}) self.assertIsNotNone(corpus1) self.assertIsNone(corpus1.id) self.assertEqual(['keiffster', 'ABCDEF123', 'PASSWORD123'], corpus1.words) corpus2 = Corpus.from_document({ '_id': '666', 'words': ['keiffster', 'ABCDEF123', 'PASSWORD123'] }) self.assertIsNotNone(corpus2) self.assertEqual("666", corpus2.id) self.assertEqual(['keiffster', 'ABCDEF123', 'PASSWORD123'], corpus2.words)
def _read_corpus_from_file(self, filename, verbose): corpus_words = [] with open(filename, "r") as text_file: for lines in text_file: words = lines.split(' ') for word in words: corpus_words.append(word) corpus = Corpus(words=corpus_words) if self.add_document(corpus) is False: return 0, 0 count = len(corpus_words) return count, count
def test_repr(self): corpus1 = Corpus.from_document( {'words': ['keiffster', 'ABCDEF123', 'PASSWORD123']}) self.assertEquals("<Corpus(id='n/a')>", str(corpus1))
def test_from_document_no_words(self): corpus2 = Corpus.from_document({'_id': '666'}) self.assertIsNotNone(corpus2) self.assertEqual("666", corpus2.id) self.assertEqual([], corpus2.words)