def test_process(self): # below data shows 6 group of each 5 point data sample_mat = [\ [0.2,0.3],[1.0,0.28],[1.98,0.7],\ [0.1,1.11],[1.0,1.12],\ [5.94,0.4],[6.73,0.38],[7.42,0.97],\ [6.74,1.23],[5.91,1.20],\ [2.0,4.8],[2.74,4.78],[3.6,5.1],\ [3.1,5.3],[1.95,5.8],\ [8.94,5.2],[9.6,5.12],[10.31,5.29],\ [8.73,6.0],[9.54,5.99],\ [5.17,9.1],[5.64,8.97],[6.56,9.39],\ [4.99,9.82],[5.5,9.74],\ [11.8,1.8],[12.04,1.74],[12.9,2.0],\ [11.74,2.4],[12.11,2.32] ] # labeling cluster for each data dbscan = DBSCAN(sample_mat, eps = 1.0, min_pts = 2, dist_func = "euclidean") label_data = dbscan.cluster() # dbscan.fit() also same for idx, row in enumerate(sample_mat): self.tlog(str(row) + " -> " + str(label_data[idx])) self.tlog("labels are " + str(list(set(label_data)))) # clustering test with unknown data r1 = autotest.eval_predict_one(dbscan, [11.70, 3.0], \ dbscan.predict([11.74, 2.4]), self.logging) r2 = autotest.eval_predict_one(dbscan, [8.40, 5.8], \ dbscan.predict([8.73, 6.0]), self.logging) r3 = autotest.eval_predict_one(dbscan, [0.7, 0.1], \ dbscan.predict([1.08, 0.7]), self.logging) assert (r1 and r2 and r3)
def test_process(self): # below data shows 6 group of each 5 point data sample_mat = [\ [0.2,0.3],[1.0,0.28],[1.98,0.7],\ [0.1,1.11],[1.0,1.12],\ [5.94,0.4],[6.73,0.38],[7.42,0.97],\ [6.74,1.23],[5.91,1.20],\ [2.0,4.8],[2.74,4.78],[3.6,5.1],\ [3.1,5.3],[1.95,5.8],\ [8.94,5.2],[9.6,5.12],[10.31,5.29],\ [8.73,6.0],[9.54,5.99],\ [5.17,9.1],[5.64,8.97],[6.56,9.39],\ [4.99,9.82],[5.5,9.74],\ [11.8,1.8],[12.04,1.74],[12.9,2.0],\ [11.74,2.4],[12.11,2.32] ] # labeling cluster for each data dbscan = DBSCAN(sample_mat, eps=1.0, min_pts=2, dist_func="euclidean") label_data = dbscan.cluster() # dbscan.fit() also same for idx, row in enumerate(sample_mat): self.tlog(str(row) + " -> " + str(label_data[idx])) self.tlog("labels are " + str(list(set(label_data)))) # clustering test with unknown data r1 = autotest.eval_predict_one(dbscan, [11.70, 3.0], \ dbscan.predict([11.74, 2.4]), self.logging) r2 = autotest.eval_predict_one(dbscan, [8.40, 5.8], \ dbscan.predict([8.73, 6.0]), self.logging) r3 = autotest.eval_predict_one(dbscan, [0.7, 0.1], \ dbscan.predict([1.08, 0.7]), self.logging) assert (r1 and r2 and r3)
def test_process(self): train_mat = [\ # Sequence of characters with no space ['<s>','I','a','m','a','b','o','y'],\ ['<s>','Y','o','u','a','r','e','a','g','i','r','l'],\ ['<s>','I','a','m','a','g','o','o','d','b','o','y'],\ ['<s>','Y','o','u','a','r','e','a','g','o','o','d','g','i','r','l'],\ ] train_label = [\ # Sequence of label tagged to space # [1 == space, 0 == no-space] [0,1,0,1,1,0,0,1],\ [0,0,0,1,0,0,1,1,0,0,0,1],\ [0,1,0,1,1,0,0,0,1,0,0,1],\ [0,0,0,1,0,0,1,1,0,0,0,1,0,0,0,1],\ ] nlp_common = nlp() voca = nlp_common.extract_vocabulary(train_mat) train_wordseq_mat = [] for wordseq in train_mat: wordseq_mat = nlp_common.set_of_wordseq2matrix(voca, wordseq) train_wordseq_mat.append(wordseq_mat) crf = CRF(train_wordseq_mat, train_label, hidden_state_labeled=True, hidden_state=2) crf.fit(toler=0.001, epoch=30) ti1 = nlp_common.set_of_wordseq2matrix( voca, ['<s>', 'I', 'a', 'm', 'g', 'o', 'o', 'd']) r1 = autotest.eval_predict_one(crf, ti1, [0, 1, 0, 1, 0, 0, 0, 1], self.logging) ti2 = nlp_common.set_of_wordseq2matrix( voca, ['<s>', 'Y', 'o', 'u', 'a', 'r', 'e', 'a', 'b', 'o', 'y']) r2 = autotest.eval_predict_one(crf, ti2, [0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1], self.logging) ti3 = nlp_common.set_of_wordseq2matrix( voca, ['<s>', 'Y', 'o', 'u', 'a', 'r', 'e', 'g', 'i', 'r', 'l']) r3 = autotest.eval_predict_one(crf, ti3, [0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1], self.logging) ti4 = nlp_common.set_of_wordseq2matrix( voca, ['<s>', 'I', 'a', 'm', 'g', 'i', 'r', 'l']) r4 = autotest.eval_predict_one(crf, ti4, [0, 1, 0, 1, 0, 0, 0, 1], self.logging)
def test_process(self): sample_docs = [\ "hello this is virus mail",\ "hi this is from friend",\ "how about buy this virus",\ "facebook friend contact to you",\ "I love you baby virus",\ "what a nice day how about you"\ ] docs_label =\ ['spam','real','spam','real','spam','real'] nlp_eng = nlp("eng") # extract vocabulary from docs voca = nlp_eng.extract_vocabulary(sample_docs) self.tlog(voca) assert len(voca) == 12 # convert docs to bag of word vector using vocabulary docs_vector = [] for doc in sample_docs: docs_vector.append(nlp_eng.bag_of_word2vector(voca, doc)) self.tlog(docs_vector) # training NaiveBayes nbayes = NaiveBayes(docs_vector, docs_label) nbayes.fit() # test case 1 tc1 = "this is virus mail" tc1_vec = nlp_eng.bag_of_word2vector(voca, tc1) self.tlog(tc1) self.tlog(tc1_vec) r1 = autotest.eval_predict_one(nbayes, tc1_vec, 'spam', self.logging) assert r1 == True # test case 2 tc2 = "I love you love" tc2_vec = nlp_eng.bag_of_word2vector(voca, tc2) self.tlog(tc2) self.tlog(tc2_vec) r2 = autotest.eval_predict_one(nbayes, tc2_vec, 'spam', self.logging) assert r2 == True
def test_process(self): train_mat = [\ [0.12, 0.25],\ [3.24, 4.33],\ [0.14, 0.45],\ [7.30, 4.23],\ ] train_label = [[0,1],[1,0],[0,1],[1,0]] # out bit is 1 fnn = FNN(train_mat, train_label, [3]) fnn.fit(lr = 0.01, epoch = 2000, err_th = 0.001, batch_size = 4) r1 = autotest.eval_predict_one(fnn,[0.10,0.33],[0, 1],self.logging, one_hot=True) r2 = autotest.eval_predict_one(fnn,[4.40,4.37],[1, 0],self.logging, one_hot=True)
def test_process(self): sample_docs = [ "hello this is virus mail", "hi this is from friend", "how about buy this virus", "facebook friend contact to you", "I love you baby virus", "what a nice day how about you", ] docs_label = ["spam", "real", "spam", "real", "spam", "real"] nlp_eng = nlp("eng") # extract vocabulary from docs voca = nlp_eng.extract_vocabulary(sample_docs) self.tlog(voca) assert len(voca) == 12 # convert docs to bag of word vector using vocabulary docs_vector = [] for doc in sample_docs: docs_vector.append(nlp_eng.bag_of_word2vector(voca, doc)) self.tlog(docs_vector) # training NaiveBayes nbayes = NaiveBayes(docs_vector, docs_label) nbayes.fit() # test case 1 tc1 = "this is virus mail" tc1_vec = nlp_eng.bag_of_word2vector(voca, tc1) self.tlog(tc1) self.tlog(tc1_vec) r1 = autotest.eval_predict_one(nbayes, tc1_vec, "spam", self.logging) assert r1 == True # test case 2 tc2 = "I love you love" tc2_vec = nlp_eng.bag_of_word2vector(voca, tc2) self.tlog(tc2) self.tlog(tc2_vec) r2 = autotest.eval_predict_one(nbayes, tc2_vec, "spam", self.logging) assert r2 == True
def test_process(self): train_mat = [\ [0.12, 0.25],\ [3.24, 4.33],\ [0.14, 0.45],\ [7.30, 4.23],\ ] train_label = [[0,1],[1,0],[0,1],[1,0]] linear_reg =\ LinearRegression(train_mat, train_label) linear_reg.fit(lr = 0.001, epoch = 1000, batch_size = 4) r1 = autotest.eval_predict_one(linear_reg,[0.10,0.33],[0, 1],self.logging, one_hot = True) r2 = autotest.eval_predict_one(linear_reg,[4.40,4.37],[1, 0],self.logging, one_hot = True)
def test_process(self): train_mat = np.mat([\ [0.12, 0.25],\ [3.24, 4.33],\ [0.14, 0.45],\ [7.30, 4.23],\ ]) train_label = np.mat([[-1.0], [1.0], [-1.0], [1.0]]) # out bit is 1 svm = SVM(train_mat, train_label) svm.fit(C = 5.0, toler = 0.001, epoch = 50) r1 = autotest.eval_predict_one(svm,np.mat([0.10,0.33]), -1.0, self.logging) r2 = autotest.eval_predict_one(svm,np.mat([4.40,4.37]), 1.0, self.logging) assert r1 assert r2
def test_process(self): train_mat = [\ [0.12, 0.25],\ [3.24, 4.33],\ [0.14, 0.45],\ [7.30, 4.23],\ ] train_label = [[0, 1], [1, 0], [0, 1], [1, 0]] # out bit is 1 fnn = FNN(train_mat, train_label, [3]) fnn.fit(lr=0.01, epoch=2000, err_th=0.001, batch_size=4) r1 = autotest.eval_predict_one(fnn, [0.10, 0.33], [0, 1], self.logging, one_hot=True) r2 = autotest.eval_predict_one(fnn, [4.40, 4.37], [1, 0], self.logging, one_hot=True)
def test_process(self): train_mat = [\ [0.12, 0.25],\ [3.24, 4.33],\ [0.14, 0.45],\ [7.30, 4.23],\ ] train_label = [[0, 1], [1, 0], [0, 1], [1, 0]] # out bit is 2 svc = SVC(train_mat, train_label) svc.fit(C=5.0, toler=0.001, epoch=50) r1 = autotest.eval_predict_one(svc, [0.10, 0.33], [0., 1.], self.logging) r2 = autotest.eval_predict_one(svc, [4.40, 4.37], [1., 0.], self.logging) assert r1 assert r2
def test_process(self): train_mat = [\ [0.12, 0.25],\ [3.24, 4.33],\ [0.14, 0.45],\ [7.30, 4.23],\ ] train_label = [[0, 1], [1, 0], [0, 1], [1, 0]] # out bit is 1 logistic_reg =\ LogisticRegression(train_mat, train_label) logistic_reg.fit(lr=0.001, epoch=2000, batch_size=4) r1 = autotest.eval_predict_one(logistic_reg, [0.10, 0.33], [0, 1], self.logging, one_hot=True) r2 = autotest.eval_predict_one(logistic_reg, [4.40, 4.37], [1, 0], self.logging, one_hot=True)
def test_process(self): tree = self.get_global_value("Stored_ID3_tree") tmp_store_name = "tmp/tree_aba_store_test.dat" self.tlog("store tree to " + tmp_store_name) fs.store_module(tree, tmp_store_name) mod = fs.restore_module(tmp_store_name) self.tlog("restored tree : " + str(mod.tree)) mod_r1 = autotest.eval_predict_one(mod, ["cloudy", "cloudy", "sunny"], "sunny", self.logging) assert mod_r1 == True
def test_process(self): # Above data shows 6 group of each 5 point data sample_mat = [\ [0.2,0.3],[1.0,0.28],[1.98,0.7],\ [0.1,1.11],[1.0,1.12],\ [5.94,0.4],[6.73,0.38],[7.42,0.97],\ [6.74,1.23],[5.91,1.20],\ [2.0,4.8],[2.74,4.78],[3.6,5.1],\ [3.1,5.3],[1.95,5.8],\ [8.94,5.2],[9.6,5.12],[10.31,5.29],\ [8.73,6.0],[9.54,5.99],\ [5.17,9.1],[5.64,8.97],[6.56,9.39],\ [4.99,9.82],[5.5,9.74],\ [11.8,1.8],[12.04,1.74],[12.9,2.0],\ [11.74,2.4],[12.11,2.32] ] kmeans = Kmeans(sample_mat, dist_func = 'euclidean') # finding cluster (Fixed K) cluster_point_fixed = \ kmeans.cluster(K = 6, epoch = 30) self.tlog("fixed point count : " + str(len(cluster_point_fixed))) self.tlog("cluster point : \n" + str(cluster_point_fixed)) # Auto finding good cluster (Flexible K) cluster_point_flexible = \ kmeans.fit(max_K = 7, random_try_count = 10, epoch = 30) self.tlog("flexible point count : " + str(len(cluster_point_flexible))) self.tlog("cluster point : \n" + str(cluster_point_flexible)) # clustering test with unknown data r1 = autotest.eval_predict_one(kmeans, [11.70, 3.0], \ kmeans.predict([11.74, 2.4]), self.logging) r2 = autotest.eval_predict_one(kmeans, [8.40, 5.8], \ kmeans.predict([8.73, 6.0]), self.logging) r3 = autotest.eval_predict_one(kmeans, [0.7, 0.1], \ kmeans.predict([1.08, 0.7]), self.logging) assert (r1 and r2 and r3)
def test_process(self): sample_mat = [['sunny','cloudy','rain'],['cloudy','sunny','rain'],['cloudy','cloudy','rain'],\ ['cloudy','cloudy','rain'],['cloudy','sunny','sunny'],['sunny','sunny','rain'],\ ['sunny','sunny','sunny'],['sunny','cloudy','sunny'],['cloudy','cloudy','rain']] sample_label = ['rain', 'rain', 'rain',\ 'sunny', 'sunny', 'rain',\ 'sunny', 'sunny', 'sunny'] tree = DecisionTreeID3(sample_mat, sample_label) tree_structure = tree.build() self.tlog("Tree structure : " + str(tree_structure)) r1 = autotest.eval_predict_one(tree, ['cloudy','cloudy','rain'] , 'sunny', self.logging) assert r1 == True
def test_process(self): sample_mat = [ ["sunny", "cloudy", "rain"], ["cloudy", "sunny", "rain"], ["cloudy", "cloudy", "rain"], ["cloudy", "cloudy", "rain"], ["cloudy", "sunny", "sunny"], ["sunny", "sunny", "rain"], ["sunny", "sunny", "sunny"], ["sunny", "cloudy", "sunny"], ["cloudy", "cloudy", "rain"], ] sample_label = ["rain", "rain", "rain", "sunny", "sunny", "rain", "sunny", "sunny", "sunny"] tree = DecisionTreeID3(sample_mat, sample_label) tree_structure = tree.build() self.tlog("Tree structure : " + str(tree_structure)) r1 = autotest.eval_predict_one(tree, ["cloudy", "cloudy", "rain"], "sunny", self.logging) assert r1 == True self.set_global_value("Stored_ID3_tree", tree)