def setUp(self):
        folder = "docs/"
        extension = ".csv"
        self.file_name = "description"
        self.file_name_1000 = "description_1000"
        self.description = pd.read_csv(folder + self.file_name + extension)
        self.description_1000 = pd.read_csv(folder + self.file_name_1000 + extension)
        self.dp = DocsPreprocessor()

        # self.test_array1
        # ['a', 'easy', 'way', 'to', 'use', 'android', 'sharepreference']
        self.test_array1 = [
            'a', 'easy', 'way', 'to', 'use', 'android', 'sharepreference']
        
        # self.test_array2
        # [['A', 'Go', 'API', 'client', 'for', 'the', 'v2', 'Docker', 
        #    'Registry', 'API']]
        self.test_array2 = [[
            'A', 'Go', 'API', 'client', 'for', 'the', 'v2', 'Docker', 
            'Registry', 'API']]
        
        # self.test_array3
        # [['A', 'Go', 'API', 'client', 'for', 'the', 'v2', 'Docker', 
        #    'Registry', 'API', '2']]
        self.test_array3 = [[
            'A', 'Go', 'API', 'client', 'for', 'the', 'v2', 'Docker', 
            'Registry', 'API', '2']]
        
        # self.test_array4
        # [['A', 'Go', 'API', 'client', 'for', 'the', 'v2', 'Docker', 
        #    'Registry', 'API', 'two']]
        self.test_array4 = [[
            'A', 'Go', 'API', 'client', 'for', 'the', 'v2', 'Docker', 
            'Registry', 'API', 'two']]
        
        # self.test_array5
        # [['a', 'boilerplate', 'for', 'a', 'Koa', 'Redux', 'React', 
        # 'application', 'with', 'Webpack,', 'Mocha', 'and', 'SASS']]
        self.test_array5 = [[
            'a', 'boilerplate', 'for', 'a', 'Koa', 'Redux', 'React', 
            'application', 'with', 'Webpack,', 'Mocha', 'and', 'SASS'
        ]]

        # self.test_array6
        # 'Many', 'corpora', 'have', 'better', 'values', 'than', 
        # 'you', 'think'
        self.test_array6 = [[
            'Many', 'corpora', 'have', 'better', 'values', 'than', 
            'you', 'think'
        ]]
class TestNMF(unittest.TestCase):
    """
    Test the NMF class.
    """
    def setUp(self):
        self.description_csv = pd.read_csv("docs/description.csv")
        self.description_1000_csv = pd.read_csv("docs/description_1000.csv")
        self.dp = DocsPreprocessor()
        self.description_1000 = self.dp.process(self.description_1000_csv)
        self.nmf = NMF(self.description_1000)

    def test_type(self):
        self.assertEqual(type(self.nmf.docs), list)

    """ def test_vectorize(self):
        vect, terms = self.nmf.vectorize()
        self.assertTrue(len(terms) == 2381)
        self.assertEqual((vect.shape[0], vect.shape[1]), (1000, 2381)) """
    """ def test_create_model(self):
        self.nmf.create_model(10) """
    """ def test_run_topic_models(self):
        self.nmf.run_topic_models(10, 30, 10) """
    """ def test_create_word_embedding_model(self):
        w_model = self.nmf.create_word_embedding_model() """

    def test_process_models(self):
        self.nmf.process_models(10, 30, 10, 20)
class TestBTM(unittest.TestCase):
    """
    Test the BTM class.
    """
    def setUp(self):
        self.description_csv = pd.read_csv("docs/description.csv")
        self.description_1000_csv = pd.read_csv("docs/description_1000.csv")
        self.dp = DocsPreprocessor()
        self.description_1000 = self.dp.process(self.description_1000_csv)
        self.btm = BTM(self.description_1000)

    def test_btm(self):
        self.btm.compute_values(2, 10, 2)
Esempio n. 4
0
class TestLDA(unittest.TestCase):
    """
    Test the LDA class.
    """
    def setUp(self):
        self.description_csv = pd.read_csv("docs/description.csv")
        self.description_1000_csv = pd.read_csv("docs/description_1000.csv")
        self.dp = DocsPreprocessor()
        self.description_1000 = self.dp.process(self.description_1000_csv)
        self.lda = LDA(self.description_1000)

    def test_1(self):
        k_values, coherence_values, topic_list = self.lda.compute_coherence_values(
            5, 20, 5)
 def setUp(self):
     self.description_csv = pd.read_csv("docs/description.csv")
     self.description_1000_csv = pd.read_csv("docs/description_1000.csv")
     self.dp = DocsPreprocessor()
     self.description_1000 = self.dp.process(self.description_1000_csv)
     self.nmf = NMF(self.description_1000)
class TestDocsPreprocessor(unittest.TestCase): 
    """
    Test the DocsPreprocessor class.
    """
    
    def setUp(self):
        folder = "docs/"
        extension = ".csv"
        self.file_name = "description"
        self.file_name_1000 = "description_1000"
        self.description = pd.read_csv(folder + self.file_name + extension)
        self.description_1000 = pd.read_csv(folder + self.file_name_1000 + extension)
        self.dp = DocsPreprocessor()

        # self.test_array1
        # ['a', 'easy', 'way', 'to', 'use', 'android', 'sharepreference']
        self.test_array1 = [
            'a', 'easy', 'way', 'to', 'use', 'android', 'sharepreference']
        
        # self.test_array2
        # [['A', 'Go', 'API', 'client', 'for', 'the', 'v2', 'Docker', 
        #    'Registry', 'API']]
        self.test_array2 = [[
            'A', 'Go', 'API', 'client', 'for', 'the', 'v2', 'Docker', 
            'Registry', 'API']]
        
        # self.test_array3
        # [['A', 'Go', 'API', 'client', 'for', 'the', 'v2', 'Docker', 
        #    'Registry', 'API', '2']]
        self.test_array3 = [[
            'A', 'Go', 'API', 'client', 'for', 'the', 'v2', 'Docker', 
            'Registry', 'API', '2']]
        
        # self.test_array4
        # [['A', 'Go', 'API', 'client', 'for', 'the', 'v2', 'Docker', 
        #    'Registry', 'API', 'two']]
        self.test_array4 = [[
            'A', 'Go', 'API', 'client', 'for', 'the', 'v2', 'Docker', 
            'Registry', 'API', 'two']]
        
        # self.test_array5
        # [['a', 'boilerplate', 'for', 'a', 'Koa', 'Redux', 'React', 
        # 'application', 'with', 'Webpack,', 'Mocha', 'and', 'SASS']]
        self.test_array5 = [[
            'a', 'boilerplate', 'for', 'a', 'Koa', 'Redux', 'React', 
            'application', 'with', 'Webpack,', 'Mocha', 'and', 'SASS'
        ]]

        # self.test_array6
        # 'Many', 'corpora', 'have', 'better', 'values', 'than', 
        # 'you', 'think'
        self.test_array6 = [[
            'Many', 'corpora', 'have', 'better', 'values', 'than', 
            'you', 'think'
        ]]
        
    # Use this test for outputting clean data
    def test_process_1000(self):
        result = self.dp.process(self.description_1000, self.file_name_1000)

    # Use this test for outputting clean data
    def test_process_full(self):
        result = self.dp.process(self.description, self.file_name)