def test_context_manager(self): for filename in glob.glob("context_output*"): os.remove(filename) with TopikProject("context_output", self.output_type, self.output_args) as project: project.read_input(source=test_data_path, content_field='abstract') project.tokenize() project.vectorize(method='bag_of_words') project.run_model(model_name='lda', ntopics=2) # above runs through a whole workflow (minus plotting.) At end, it closes file. # load output here. with TopikProject("context_output") as project: nt.assert_equal(len(list(project.get_filtered_corpus_iterator())), 100) nt.assert_true(sample_tokenized_doc in list( iter(project.selected_tokenized_corpus))) nt.assert_equal( project.selected_vectorized_corpus.global_term_count, 2434) nt.assert_equal(len(project.selected_vectorized_corpus), 100) # All documents processed for doc in project.selected_modeled_corpus.doc_topic_matrix.values( ): nt.assert_almost_equal(sum(doc), 1) for topic in project.selected_modeled_corpus.topic_term_matrix.values( ): nt.assert_almost_equal(sum(topic), 1) for filename in glob.glob("context_output*"): os.remove(filename)
class TestElasticSearchOutput(unittest.TestCase, ProjectTest): INDEX = "test_index" def setUp(self): self.output_type = "ElasticSearchOutput" self.output_args = { 'source': 'localhost', 'index': TestElasticSearchOutput.INDEX, 'content_field': "abstract" } self.project = TopikProject("test_project", output_type=self.output_type, output_args=self.output_args) self.project.read_input(test_data_path, content_field="abstract", synchronous_wait=30) def tearDown(self): instance = elasticsearch.Elasticsearch("localhost") instance.indices.delete(TestElasticSearchOutput.INDEX) if instance.indices.exists("{}_year_alias_date".format( TestElasticSearchOutput.INDEX)): instance.indices.delete("{}_year_alias_date".format( TestElasticSearchOutput.INDEX)) time.sleep(1)
def setUp(self): self.output_type = "InMemoryOutput" self.output_args = {} self.project = TopikProject("test_project", output_type=self.output_type, output_args=self.output_args) self.project.read_input(test_data_path, content_field="abstract")
class TestInMemoryOutput(unittest.TestCase, ProjectTest): def setUp(self): self.output_type = "InMemoryOutput" self.output_args = {} self.project = TopikProject("test_project", output_type=self.output_type, output_args=self.output_args) self.project.read_input(test_data_path, content_field="abstract")
def setUp(self): self.output_type = "ElasticSearchOutput" self.output_args = { 'source': 'localhost', 'index': TestElasticSearchOutput.INDEX, 'content_field': "abstract" } self.project = TopikProject("test_project", output_type=self.output_type, output_args=self.output_args) self.project.read_input(test_data_path, content_field="abstract", synchronous_wait=30)
def setUp(self): self.output_type = "ElasticSearchOutput" self.output_args = { 'source': 'localhost', 'index': TestElasticSearchOutput.INDEX, 'content_field': "abstract" } self.project = TopikProject("test_project", output_type=self.output_type, output_args=self.output_args) try: self.project.read_input(test_data_path, content_field="abstract", synchronous_wait=30) except ConnectionError: raise SkipTest( "Skipping Elasticsearch test - elasticsearch not running")
def setUp(self): self.output_type = "ElasticSearchOutput" self.output_args = {'source': 'localhost', 'index': TestElasticSearchOutput.INDEX, 'content_field': "abstract"} self.project = TopikProject("test_project", output_type=self.output_type, output_args=self.output_args) self.project.read_input(test_data_path, content_field="abstract", synchronous_wait=30)
class TestElasticSearchOutput(unittest.TestCase, ProjectTest): INDEX = "test_index" def setUp(self): self.output_type = "ElasticSearchOutput" self.output_args = {'source': 'localhost', 'index': TestElasticSearchOutput.INDEX, 'content_field': "abstract"} self.project = TopikProject("test_project", output_type=self.output_type, output_args=self.output_args) self.project.read_input(test_data_path, content_field="abstract", synchronous_wait=30) def tearDown(self): instance = elasticsearch.Elasticsearch("localhost") instance.indices.delete(TestElasticSearchOutput.INDEX) if instance.indices.exists("{}_year_alias_date".format(TestElasticSearchOutput.INDEX)): instance.indices.delete("{}_year_alias_date".format(TestElasticSearchOutput.INDEX)) time.sleep(1)