Exemple #1
0
    def test_context_manager(self):
        for filename in glob.glob("context_output*"):
            os.remove(filename)
        with TopikProject("context_output", self.output_type,
                          self.output_args) as project:
            project.read_input(source=test_data_path, content_field='abstract')
            project.tokenize()
            project.vectorize(method='bag_of_words')
            project.run_model(model_name='lda', ntopics=2)

        # above runs through a whole workflow (minus plotting.)  At end, it closes file.
        # load output here.
        with TopikProject("context_output") as project:
            nt.assert_equal(len(list(project.get_filtered_corpus_iterator())),
                            100)
            nt.assert_true(sample_tokenized_doc in list(
                iter(project.selected_tokenized_corpus)))
            nt.assert_equal(
                project.selected_vectorized_corpus.global_term_count, 2434)
            nt.assert_equal(len(project.selected_vectorized_corpus),
                            100)  # All documents processed
            for doc in project.selected_modeled_corpus.doc_topic_matrix.values(
            ):
                nt.assert_almost_equal(sum(doc), 1)
            for topic in project.selected_modeled_corpus.topic_term_matrix.values(
            ):
                nt.assert_almost_equal(sum(topic), 1)

        for filename in glob.glob("context_output*"):
            os.remove(filename)
Exemple #2
0
class TestElasticSearchOutput(unittest.TestCase, ProjectTest):
    INDEX = "test_index"

    def setUp(self):
        self.output_type = "ElasticSearchOutput"
        self.output_args = {
            'source': 'localhost',
            'index': TestElasticSearchOutput.INDEX,
            'content_field': "abstract"
        }
        self.project = TopikProject("test_project",
                                    output_type=self.output_type,
                                    output_args=self.output_args)
        self.project.read_input(test_data_path,
                                content_field="abstract",
                                synchronous_wait=30)

    def tearDown(self):
        instance = elasticsearch.Elasticsearch("localhost")
        instance.indices.delete(TestElasticSearchOutput.INDEX)
        if instance.indices.exists("{}_year_alias_date".format(
                TestElasticSearchOutput.INDEX)):
            instance.indices.delete("{}_year_alias_date".format(
                TestElasticSearchOutput.INDEX))
        time.sleep(1)
Exemple #3
0
 def setUp(self):
     self.output_type = "InMemoryOutput"
     self.output_args = {}
     self.project = TopikProject("test_project",
                                 output_type=self.output_type,
                                 output_args=self.output_args)
     self.project.read_input(test_data_path, content_field="abstract")
Exemple #4
0
class TestInMemoryOutput(unittest.TestCase, ProjectTest):
    def setUp(self):
        self.output_type = "InMemoryOutput"
        self.output_args = {}
        self.project = TopikProject("test_project",
                                    output_type=self.output_type,
                                    output_args=self.output_args)
        self.project.read_input(test_data_path, content_field="abstract")
Exemple #5
0
 def setUp(self):
     self.output_type = "ElasticSearchOutput"
     self.output_args = {
         'source': 'localhost',
         'index': TestElasticSearchOutput.INDEX,
         'content_field': "abstract"
     }
     self.project = TopikProject("test_project",
                                 output_type=self.output_type,
                                 output_args=self.output_args)
     self.project.read_input(test_data_path,
                             content_field="abstract",
                             synchronous_wait=30)
Exemple #6
0
 def setUp(self):
     self.output_type = "ElasticSearchOutput"
     self.output_args = {
         'source': 'localhost',
         'index': TestElasticSearchOutput.INDEX,
         'content_field': "abstract"
     }
     self.project = TopikProject("test_project",
                                 output_type=self.output_type,
                                 output_args=self.output_args)
     try:
         self.project.read_input(test_data_path,
                                 content_field="abstract",
                                 synchronous_wait=30)
     except ConnectionError:
         raise SkipTest(
             "Skipping Elasticsearch test - elasticsearch not running")
Exemple #7
0
 def setUp(self):
     self.output_type = "ElasticSearchOutput"
     self.output_args = {'source': 'localhost',
                         'index': TestElasticSearchOutput.INDEX,
                         'content_field': "abstract"}
     self.project = TopikProject("test_project", output_type=self.output_type,
                                 output_args=self.output_args)
     self.project.read_input(test_data_path, content_field="abstract",
                             synchronous_wait=30)
Exemple #8
0
class TestElasticSearchOutput(unittest.TestCase, ProjectTest):
    INDEX = "test_index"
    def setUp(self):
        self.output_type = "ElasticSearchOutput"
        self.output_args = {'source': 'localhost',
                            'index': TestElasticSearchOutput.INDEX,
                            'content_field': "abstract"}
        self.project = TopikProject("test_project", output_type=self.output_type,
                                    output_args=self.output_args)
        self.project.read_input(test_data_path, content_field="abstract",
                                synchronous_wait=30)

    def tearDown(self):
        instance = elasticsearch.Elasticsearch("localhost")
        instance.indices.delete(TestElasticSearchOutput.INDEX)
        if instance.indices.exists("{}_year_alias_date".format(TestElasticSearchOutput.INDEX)):
            instance.indices.delete("{}_year_alias_date".format(TestElasticSearchOutput.INDEX))
        time.sleep(1)