def setUp(self): folder = "docs/" extension = ".csv" self.file_name = "description" self.file_name_1000 = "description_1000" self.description = pd.read_csv(folder + self.file_name + extension) self.description_1000 = pd.read_csv(folder + self.file_name_1000 + extension) self.dp = DocsPreprocessor() # self.test_array1 # ['a', 'easy', 'way', 'to', 'use', 'android', 'sharepreference'] self.test_array1 = [ 'a', 'easy', 'way', 'to', 'use', 'android', 'sharepreference'] # self.test_array2 # [['A', 'Go', 'API', 'client', 'for', 'the', 'v2', 'Docker', # 'Registry', 'API']] self.test_array2 = [[ 'A', 'Go', 'API', 'client', 'for', 'the', 'v2', 'Docker', 'Registry', 'API']] # self.test_array3 # [['A', 'Go', 'API', 'client', 'for', 'the', 'v2', 'Docker', # 'Registry', 'API', '2']] self.test_array3 = [[ 'A', 'Go', 'API', 'client', 'for', 'the', 'v2', 'Docker', 'Registry', 'API', '2']] # self.test_array4 # [['A', 'Go', 'API', 'client', 'for', 'the', 'v2', 'Docker', # 'Registry', 'API', 'two']] self.test_array4 = [[ 'A', 'Go', 'API', 'client', 'for', 'the', 'v2', 'Docker', 'Registry', 'API', 'two']] # self.test_array5 # [['a', 'boilerplate', 'for', 'a', 'Koa', 'Redux', 'React', # 'application', 'with', 'Webpack,', 'Mocha', 'and', 'SASS']] self.test_array5 = [[ 'a', 'boilerplate', 'for', 'a', 'Koa', 'Redux', 'React', 'application', 'with', 'Webpack,', 'Mocha', 'and', 'SASS' ]] # self.test_array6 # 'Many', 'corpora', 'have', 'better', 'values', 'than', # 'you', 'think' self.test_array6 = [[ 'Many', 'corpora', 'have', 'better', 'values', 'than', 'you', 'think' ]]
class TestNMF(unittest.TestCase): """ Test the NMF class. """ def setUp(self): self.description_csv = pd.read_csv("docs/description.csv") self.description_1000_csv = pd.read_csv("docs/description_1000.csv") self.dp = DocsPreprocessor() self.description_1000 = self.dp.process(self.description_1000_csv) self.nmf = NMF(self.description_1000) def test_type(self): self.assertEqual(type(self.nmf.docs), list) """ def test_vectorize(self): vect, terms = self.nmf.vectorize() self.assertTrue(len(terms) == 2381) self.assertEqual((vect.shape[0], vect.shape[1]), (1000, 2381)) """ """ def test_create_model(self): self.nmf.create_model(10) """ """ def test_run_topic_models(self): self.nmf.run_topic_models(10, 30, 10) """ """ def test_create_word_embedding_model(self): w_model = self.nmf.create_word_embedding_model() """ def test_process_models(self): self.nmf.process_models(10, 30, 10, 20)
class TestBTM(unittest.TestCase): """ Test the BTM class. """ def setUp(self): self.description_csv = pd.read_csv("docs/description.csv") self.description_1000_csv = pd.read_csv("docs/description_1000.csv") self.dp = DocsPreprocessor() self.description_1000 = self.dp.process(self.description_1000_csv) self.btm = BTM(self.description_1000) def test_btm(self): self.btm.compute_values(2, 10, 2)
class TestLDA(unittest.TestCase): """ Test the LDA class. """ def setUp(self): self.description_csv = pd.read_csv("docs/description.csv") self.description_1000_csv = pd.read_csv("docs/description_1000.csv") self.dp = DocsPreprocessor() self.description_1000 = self.dp.process(self.description_1000_csv) self.lda = LDA(self.description_1000) def test_1(self): k_values, coherence_values, topic_list = self.lda.compute_coherence_values( 5, 20, 5)
def setUp(self): self.description_csv = pd.read_csv("docs/description.csv") self.description_1000_csv = pd.read_csv("docs/description_1000.csv") self.dp = DocsPreprocessor() self.description_1000 = self.dp.process(self.description_1000_csv) self.nmf = NMF(self.description_1000)
class TestDocsPreprocessor(unittest.TestCase): """ Test the DocsPreprocessor class. """ def setUp(self): folder = "docs/" extension = ".csv" self.file_name = "description" self.file_name_1000 = "description_1000" self.description = pd.read_csv(folder + self.file_name + extension) self.description_1000 = pd.read_csv(folder + self.file_name_1000 + extension) self.dp = DocsPreprocessor() # self.test_array1 # ['a', 'easy', 'way', 'to', 'use', 'android', 'sharepreference'] self.test_array1 = [ 'a', 'easy', 'way', 'to', 'use', 'android', 'sharepreference'] # self.test_array2 # [['A', 'Go', 'API', 'client', 'for', 'the', 'v2', 'Docker', # 'Registry', 'API']] self.test_array2 = [[ 'A', 'Go', 'API', 'client', 'for', 'the', 'v2', 'Docker', 'Registry', 'API']] # self.test_array3 # [['A', 'Go', 'API', 'client', 'for', 'the', 'v2', 'Docker', # 'Registry', 'API', '2']] self.test_array3 = [[ 'A', 'Go', 'API', 'client', 'for', 'the', 'v2', 'Docker', 'Registry', 'API', '2']] # self.test_array4 # [['A', 'Go', 'API', 'client', 'for', 'the', 'v2', 'Docker', # 'Registry', 'API', 'two']] self.test_array4 = [[ 'A', 'Go', 'API', 'client', 'for', 'the', 'v2', 'Docker', 'Registry', 'API', 'two']] # self.test_array5 # [['a', 'boilerplate', 'for', 'a', 'Koa', 'Redux', 'React', # 'application', 'with', 'Webpack,', 'Mocha', 'and', 'SASS']] self.test_array5 = [[ 'a', 'boilerplate', 'for', 'a', 'Koa', 'Redux', 'React', 'application', 'with', 'Webpack,', 'Mocha', 'and', 'SASS' ]] # self.test_array6 # 'Many', 'corpora', 'have', 'better', 'values', 'than', # 'you', 'think' self.test_array6 = [[ 'Many', 'corpora', 'have', 'better', 'values', 'than', 'you', 'think' ]] # Use this test for outputting clean data def test_process_1000(self): result = self.dp.process(self.description_1000, self.file_name_1000) # Use this test for outputting clean data def test_process_full(self): result = self.dp.process(self.description, self.file_name)