def test_creation_from_file(self): """Test that a Corpus instance is properly created from a file.""" file_path = os.path.join(self.tempdir.name, "file") with open(file_path, "w") as wf: for document in self.list_of_documents: line = " ".join(document) + "\n" wf.write(line) corpus_from_file = Corpus.from_document_file(file_path) corpus_from_documents = Corpus.from_iterable_of_word_lists(self.list_of_documents) self.assertEqual(corpus_from_documents, corpus_from_file, "Loaded corpus is not correct.")
def setUp(self): """Code to run before every test.""" self.list_of_documents = [ ["the", "quick", "brown", "fox"], ["jumped", "over"], ["the", "lazy", "lazy", "dog"] ] self.corpus = Corpus.from_iterable_of_word_lists(self.list_of_documents)
def setUp(self): """Code to run before each test.""" corpus = Corpus.from_document_file("tests/data/sample_data") self.model = GibbsSamplingDMM(corpus, number_of_topics=20) self.model.randomly_initialise_topic_assignment(seed=1) self.model.inference(5) self.tempdir = tempfile.TemporaryDirectory()
def setUp(self): """Code to run before each test.""" corpus = Corpus.from_document_file("tests/data/sample_data") self.model = GibbsSamplingDMM(corpus, number_of_topics=20) self.model.randomly_initialise_topic_assignment(seed=1) self.model.inference(100) self.generated_documents, self.chosen_topics = self.model.generate_synthetic_documents( 10, seed=5)
def test_speed_of_inference(self): """Test that the inference is fast enough.""" corpus = Corpus.from_document_file("tests/data/sample_data") model = GibbsSamplingDMM(corpus, number_of_topics=20) model.randomly_initialise_topic_assignment(seed=1) number_of_iterations = 200 t0 = time.time() model.inference(number_of_iterations) t1 = time.time() average_seconds_per_iteration = (t1 - t0) / number_of_iterations expected_seconds_per_iteration = 0.02 delta = 0.05 if average_seconds_per_iteration < expected_seconds_per_iteration - delta: self.fail( "Code was faster: actually took {:.5f} per iteration.".format( average_seconds_per_iteration)) else: self.assertAlmostEqual(average_seconds_per_iteration, expected_seconds_per_iteration, delta=0.05)
def test_bad_equality(self): """Test that a Corpus instance is not equal to an integer.""" corpus = Corpus.from_iterable_of_word_lists(self.list_of_documents) with self.assertRaises(TypeError, msg="Comparison to incorrect type should raise an error."): bool(corpus == 5)
def setUp(self): """Code to run before each test.""" corpus = Corpus.from_document_file("tests/data/sample_data") self.model = GibbsSamplingDMM(corpus, number_of_topics=20) self.model.randomly_initialise_topic_assignment(seed=1)