Ejemplo n.º 1
0
    def test_process(self):
        # below data shows 6 group of each 5 point data
        sample_mat = [\
                          [0.2,0.3],[1.0,0.28],[1.98,0.7],\
                          [0.1,1.11],[1.0,1.12],\
                          [5.94,0.4],[6.73,0.38],[7.42,0.97],\
                          [6.74,1.23],[5.91,1.20],\
                          [2.0,4.8],[2.74,4.78],[3.6,5.1],\
                          [3.1,5.3],[1.95,5.8],\
                          [8.94,5.2],[9.6,5.12],[10.31,5.29],\
                          [8.73,6.0],[9.54,5.99],\
                          [5.17,9.1],[5.64,8.97],[6.56,9.39],\
                          [4.99,9.82],[5.5,9.74],\
                          [11.8,1.8],[12.04,1.74],[12.9,2.0],\
                          [11.74,2.4],[12.11,2.32]
                      ]

        # labeling cluster for each data
        dbscan = DBSCAN(sample_mat, eps = 1.0, min_pts = 2, dist_func = "euclidean")
        label_data =  dbscan.cluster() # dbscan.fit() also same

        for idx, row in enumerate(sample_mat):
            self.tlog(str(row) + " -> " + str(label_data[idx]))
        self.tlog("labels are " + str(list(set(label_data))))

        # clustering test with unknown data
        r1 = autotest.eval_predict_one(dbscan, [11.70, 3.0], \
                            dbscan.predict([11.74, 2.4]), self.logging)
        r2 = autotest.eval_predict_one(dbscan, [8.40, 5.8], \
                            dbscan.predict([8.73, 6.0]), self.logging)
        r3 = autotest.eval_predict_one(dbscan, [0.7, 0.1], \
                            dbscan.predict([1.08, 0.7]), self.logging)

        assert (r1 and r2 and r3)
Ejemplo n.º 2
0
    def test_process(self):
        # below data shows 6 group of each 5 point data
        sample_mat = [\
                          [0.2,0.3],[1.0,0.28],[1.98,0.7],\
                          [0.1,1.11],[1.0,1.12],\
                          [5.94,0.4],[6.73,0.38],[7.42,0.97],\
                          [6.74,1.23],[5.91,1.20],\
                          [2.0,4.8],[2.74,4.78],[3.6,5.1],\
                          [3.1,5.3],[1.95,5.8],\
                          [8.94,5.2],[9.6,5.12],[10.31,5.29],\
                          [8.73,6.0],[9.54,5.99],\
                          [5.17,9.1],[5.64,8.97],[6.56,9.39],\
                          [4.99,9.82],[5.5,9.74],\
                          [11.8,1.8],[12.04,1.74],[12.9,2.0],\
                          [11.74,2.4],[12.11,2.32]
                      ]

        # labeling cluster for each data
        dbscan = DBSCAN(sample_mat, eps=1.0, min_pts=2, dist_func="euclidean")
        label_data = dbscan.cluster()  # dbscan.fit() also same

        for idx, row in enumerate(sample_mat):
            self.tlog(str(row) + " -> " + str(label_data[idx]))
        self.tlog("labels are " + str(list(set(label_data))))

        # clustering test with unknown data
        r1 = autotest.eval_predict_one(dbscan, [11.70, 3.0], \
                            dbscan.predict([11.74, 2.4]), self.logging)
        r2 = autotest.eval_predict_one(dbscan, [8.40, 5.8], \
                            dbscan.predict([8.73, 6.0]), self.logging)
        r3 = autotest.eval_predict_one(dbscan, [0.7, 0.1], \
                            dbscan.predict([1.08, 0.7]), self.logging)

        assert (r1 and r2 and r3)
Ejemplo n.º 3
0
    def test_process(self):

        train_mat = [\
                     # Sequence of characters with no space

                     ['<s>','I','a','m','a','b','o','y'],\
                     ['<s>','Y','o','u','a','r','e','a','g','i','r','l'],\
                     ['<s>','I','a','m','a','g','o','o','d','b','o','y'],\
                     ['<s>','Y','o','u','a','r','e','a','g','o','o','d','g','i','r','l'],\
                     ]

        train_label = [\
                     # Sequence of label tagged to space
                     # [1 == space, 0 == no-space]


                     [0,1,0,1,1,0,0,1],\
                     [0,0,0,1,0,0,1,1,0,0,0,1],\
                     [0,1,0,1,1,0,0,0,1,0,0,1],\
                     [0,0,0,1,0,0,1,1,0,0,0,1,0,0,0,1],\
                    ]

        nlp_common = nlp()
        voca = nlp_common.extract_vocabulary(train_mat)
        train_wordseq_mat = []
        for wordseq in train_mat:
            wordseq_mat = nlp_common.set_of_wordseq2matrix(voca, wordseq)
            train_wordseq_mat.append(wordseq_mat)

        crf = CRF(train_wordseq_mat,
                  train_label,
                  hidden_state_labeled=True,
                  hidden_state=2)
        crf.fit(toler=0.001, epoch=30)

        ti1 = nlp_common.set_of_wordseq2matrix(
            voca, ['<s>', 'I', 'a', 'm', 'g', 'o', 'o', 'd'])
        r1 = autotest.eval_predict_one(crf, ti1, [0, 1, 0, 1, 0, 0, 0, 1],
                                       self.logging)

        ti2 = nlp_common.set_of_wordseq2matrix(
            voca, ['<s>', 'Y', 'o', 'u', 'a', 'r', 'e', 'a', 'b', 'o', 'y'])
        r2 = autotest.eval_predict_one(crf, ti2,
                                       [0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1],
                                       self.logging)

        ti3 = nlp_common.set_of_wordseq2matrix(
            voca, ['<s>', 'Y', 'o', 'u', 'a', 'r', 'e', 'g', 'i', 'r', 'l'])
        r3 = autotest.eval_predict_one(crf, ti3,
                                       [0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1],
                                       self.logging)

        ti4 = nlp_common.set_of_wordseq2matrix(
            voca, ['<s>', 'I', 'a', 'm', 'g', 'i', 'r', 'l'])
        r4 = autotest.eval_predict_one(crf, ti4, [0, 1, 0, 1, 0, 0, 0, 1],
                                       self.logging)
Ejemplo n.º 4
0
    def test_process(self):
        sample_docs = [\
                "hello this is virus mail",\
                "hi this is from friend",\
                "how about buy this virus",\
                "facebook friend contact to you",\
                "I love you baby virus",\
                "what a nice day how about you"\
            ]

        docs_label =\
                ['spam','real','spam','real','spam','real']

        nlp_eng = nlp("eng")

        # extract vocabulary from docs
        voca = nlp_eng.extract_vocabulary(sample_docs)
        self.tlog(voca)
        assert len(voca) == 12

        # convert docs to bag of word vector using vocabulary
        docs_vector = []
        for doc in sample_docs:
            docs_vector.append(nlp_eng.bag_of_word2vector(voca, doc))
        self.tlog(docs_vector)

        # training NaiveBayes
        nbayes = NaiveBayes(docs_vector, docs_label)
        nbayes.fit()

        # test case 1
        tc1 = "this is virus mail"
        tc1_vec = nlp_eng.bag_of_word2vector(voca, tc1)

        self.tlog(tc1)
        self.tlog(tc1_vec)

        r1 = autotest.eval_predict_one(nbayes, tc1_vec, 'spam', self.logging)
        assert r1 == True

        # test case 2
        tc2 = "I love you love"
        tc2_vec = nlp_eng.bag_of_word2vector(voca, tc2)

        self.tlog(tc2)
        self.tlog(tc2_vec)

        r2 = autotest.eval_predict_one(nbayes, tc2_vec, 'spam', self.logging)
        assert r2 == True
Ejemplo n.º 5
0
    def test_process(self):

        train_mat = [\
                     [0.12, 0.25],\
                     [3.24, 4.33],\
                     [0.14, 0.45],\
                     [7.30, 4.23],\
                     ]
        train_label = [[0,1],[1,0],[0,1],[1,0]] # out bit is 1
        
        fnn = FNN(train_mat, train_label, [3])
        fnn.fit(lr = 0.01, epoch = 2000, err_th = 0.001, batch_size = 4)
        
        r1 = autotest.eval_predict_one(fnn,[0.10,0.33],[0, 1],self.logging, one_hot=True)
        r2 = autotest.eval_predict_one(fnn,[4.40,4.37],[1, 0],self.logging, one_hot=True)
Ejemplo n.º 6
0
    def test_process(self):
        sample_docs = [
            "hello this is virus mail",
            "hi this is from friend",
            "how about buy this virus",
            "facebook friend contact to you",
            "I love you baby virus",
            "what a nice day how about you",
        ]

        docs_label = ["spam", "real", "spam", "real", "spam", "real"]

        nlp_eng = nlp("eng")

        # extract vocabulary from docs
        voca = nlp_eng.extract_vocabulary(sample_docs)
        self.tlog(voca)
        assert len(voca) == 12

        # convert docs to bag of word vector using vocabulary
        docs_vector = []
        for doc in sample_docs:
            docs_vector.append(nlp_eng.bag_of_word2vector(voca, doc))
        self.tlog(docs_vector)

        # training NaiveBayes
        nbayes = NaiveBayes(docs_vector, docs_label)
        nbayes.fit()

        # test case 1
        tc1 = "this is virus mail"
        tc1_vec = nlp_eng.bag_of_word2vector(voca, tc1)

        self.tlog(tc1)
        self.tlog(tc1_vec)

        r1 = autotest.eval_predict_one(nbayes, tc1_vec, "spam", self.logging)
        assert r1 == True

        # test case 2
        tc2 = "I love you love"
        tc2_vec = nlp_eng.bag_of_word2vector(voca, tc2)

        self.tlog(tc2)
        self.tlog(tc2_vec)

        r2 = autotest.eval_predict_one(nbayes, tc2_vec, "spam", self.logging)
        assert r2 == True
Ejemplo n.º 7
0
    def test_process(self):

        train_mat = [\
                     [0.12, 0.25],\
                     [3.24, 4.33],\
                     [0.14, 0.45],\
                     [7.30, 4.23],\
                     ]
        train_label = [[0,1],[1,0],[0,1],[1,0]]
        
        linear_reg =\
            LinearRegression(train_mat, train_label)
        linear_reg.fit(lr = 0.001, epoch = 1000, batch_size = 4)
        
        r1 = autotest.eval_predict_one(linear_reg,[0.10,0.33],[0, 1],self.logging, one_hot = True)
        r2 = autotest.eval_predict_one(linear_reg,[4.40,4.37],[1, 0],self.logging, one_hot = True)
Ejemplo n.º 8
0
    def test_process(self):
        train_mat = np.mat([\
                     [0.12, 0.25],\
                     [3.24, 4.33],\
                     [0.14, 0.45],\
                     [7.30, 4.23],\
                     ])
        train_label = np.mat([[-1.0], [1.0], [-1.0], [1.0]]) # out bit is 1

        svm = SVM(train_mat, train_label)
        svm.fit(C = 5.0, toler = 0.001, epoch = 50)
        
        r1 = autotest.eval_predict_one(svm,np.mat([0.10,0.33]), -1.0, self.logging)
        r2 = autotest.eval_predict_one(svm,np.mat([4.40,4.37]), 1.0, self.logging)

        assert r1
        assert r2
Ejemplo n.º 9
0
    def test_process(self):

        train_mat = [\
                     [0.12, 0.25],\
                     [3.24, 4.33],\
                     [0.14, 0.45],\
                     [7.30, 4.23],\
                     ]
        train_label = [[0, 1], [1, 0], [0, 1], [1, 0]]  # out bit is 1

        fnn = FNN(train_mat, train_label, [3])
        fnn.fit(lr=0.01, epoch=2000, err_th=0.001, batch_size=4)

        r1 = autotest.eval_predict_one(fnn, [0.10, 0.33], [0, 1],
                                       self.logging,
                                       one_hot=True)
        r2 = autotest.eval_predict_one(fnn, [4.40, 4.37], [1, 0],
                                       self.logging,
                                       one_hot=True)
Ejemplo n.º 10
0
    def test_process(self):
        train_mat = [\
                     [0.12, 0.25],\
                     [3.24, 4.33],\
                     [0.14, 0.45],\
                     [7.30, 4.23],\
                     ]
        train_label = [[0, 1], [1, 0], [0, 1], [1, 0]]  # out bit is 2

        svc = SVC(train_mat, train_label)
        svc.fit(C=5.0, toler=0.001, epoch=50)

        r1 = autotest.eval_predict_one(svc, [0.10, 0.33], [0., 1.],
                                       self.logging)
        r2 = autotest.eval_predict_one(svc, [4.40, 4.37], [1., 0.],
                                       self.logging)

        assert r1
        assert r2
Ejemplo n.º 11
0
    def test_process(self):

        train_mat = [\
                     [0.12, 0.25],\
                     [3.24, 4.33],\
                     [0.14, 0.45],\
                     [7.30, 4.23],\
                     ]
        train_label = [[0, 1], [1, 0], [0, 1], [1, 0]]  # out bit is 1

        logistic_reg =\
            LogisticRegression(train_mat, train_label)
        logistic_reg.fit(lr=0.001, epoch=2000, batch_size=4)

        r1 = autotest.eval_predict_one(logistic_reg, [0.10, 0.33], [0, 1],
                                       self.logging,
                                       one_hot=True)
        r2 = autotest.eval_predict_one(logistic_reg, [4.40, 4.37], [1, 0],
                                       self.logging,
                                       one_hot=True)
Ejemplo n.º 12
0
    def test_process(self):
        tree = self.get_global_value("Stored_ID3_tree")
        tmp_store_name = "tmp/tree_aba_store_test.dat"

        self.tlog("store tree to " + tmp_store_name)
        fs.store_module(tree, tmp_store_name)
        mod = fs.restore_module(tmp_store_name)

        self.tlog("restored tree : " + str(mod.tree))
        mod_r1 = autotest.eval_predict_one(mod, ["cloudy", "cloudy", "sunny"], "sunny", self.logging)

        assert mod_r1 == True
Ejemplo n.º 13
0
    def test_process(self):
        # Above data shows 6 group of each 5 point data
        sample_mat = [\
                          [0.2,0.3],[1.0,0.28],[1.98,0.7],\
                          [0.1,1.11],[1.0,1.12],\
                          [5.94,0.4],[6.73,0.38],[7.42,0.97],\
                          [6.74,1.23],[5.91,1.20],\
                          [2.0,4.8],[2.74,4.78],[3.6,5.1],\
                          [3.1,5.3],[1.95,5.8],\
                          [8.94,5.2],[9.6,5.12],[10.31,5.29],\
                          [8.73,6.0],[9.54,5.99],\
                          [5.17,9.1],[5.64,8.97],[6.56,9.39],\
                          [4.99,9.82],[5.5,9.74],\
                          [11.8,1.8],[12.04,1.74],[12.9,2.0],\
                          [11.74,2.4],[12.11,2.32]
                      ]
            
        kmeans = Kmeans(sample_mat, dist_func = 'euclidean')

        # finding cluster (Fixed K)
        cluster_point_fixed = \
                kmeans.cluster(K = 6, epoch = 30)
        self.tlog("fixed point count : " + str(len(cluster_point_fixed)))
        self.tlog("cluster point : \n" + str(cluster_point_fixed))

        # Auto finding good cluster (Flexible K)
        cluster_point_flexible = \
          kmeans.fit(max_K = 7, random_try_count = 10, epoch = 30)
        self.tlog("flexible point count : " + str(len(cluster_point_flexible)))
        self.tlog("cluster point : \n" + str(cluster_point_flexible))

        # clustering test with unknown data
        r1 = autotest.eval_predict_one(kmeans, [11.70, 3.0], \
                                        kmeans.predict([11.74, 2.4]), self.logging)
        r2 = autotest.eval_predict_one(kmeans, [8.40, 5.8], \
                                        kmeans.predict([8.73, 6.0]), self.logging)
        r3 = autotest.eval_predict_one(kmeans, [0.7, 0.1], \
                                        kmeans.predict([1.08, 0.7]), self.logging)
                                        
        assert (r1 and r2 and r3)
Ejemplo n.º 14
0
 def test_process(self):
     
     sample_mat = [['sunny','cloudy','rain'],['cloudy','sunny','rain'],['cloudy','cloudy','rain'],\
             ['cloudy','cloudy','rain'],['cloudy','sunny','sunny'],['sunny','sunny','rain'],\
             ['sunny','sunny','sunny'],['sunny','cloudy','sunny'],['cloudy','cloudy','rain']]
     
     sample_label = ['rain',  'rain',  'rain',\
             'sunny',  'sunny',  'rain',\
             'sunny',  'sunny', 'sunny']
             
     tree = DecisionTreeID3(sample_mat, sample_label)
     tree_structure = tree.build()
     self.tlog("Tree structure : " + str(tree_structure))
     
     r1 = autotest.eval_predict_one(tree, ['cloudy','cloudy','rain'] , 'sunny', self.logging)
     assert r1 == True 
Ejemplo n.º 15
0
    def test_process(self):

        sample_mat = [
            ["sunny", "cloudy", "rain"],
            ["cloudy", "sunny", "rain"],
            ["cloudy", "cloudy", "rain"],
            ["cloudy", "cloudy", "rain"],
            ["cloudy", "sunny", "sunny"],
            ["sunny", "sunny", "rain"],
            ["sunny", "sunny", "sunny"],
            ["sunny", "cloudy", "sunny"],
            ["cloudy", "cloudy", "rain"],
        ]

        sample_label = ["rain", "rain", "rain", "sunny", "sunny", "rain", "sunny", "sunny", "sunny"]

        tree = DecisionTreeID3(sample_mat, sample_label)
        tree_structure = tree.build()
        self.tlog("Tree structure : " + str(tree_structure))

        r1 = autotest.eval_predict_one(tree, ["cloudy", "cloudy", "rain"], "sunny", self.logging)
        assert r1 == True

        self.set_global_value("Stored_ID3_tree", tree)