def test_classify(self): k = 3 model = kNN.train(xs, ys, k) result = kNN.classify(model, [6, -173.143442352]) self.assertEqual(result, 1) result = kNN.classify(model, [309, -271.005880394]) self.assertEqual(result, 0)
def test_model_accuracy(self): correct = 0 k = 3 model = kNN.train(xs, ys, k) predictions = [1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0] for i in range(len(predictions)): prediction = kNN.classify(model, xs[i]) self.assertEqual(prediction, predictions[i]) if prediction == ys[i]: correct += 1 self.assertEqual(correct, 15)
def test_leave_one_out(self): correct = 0 k = 3 model = kNN.train(xs, ys, k) predictions = [1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1] for i in range(len(predictions)): model = kNN.train(xs[:i] + xs[i + 1 :], ys[:i] + ys[i + 1 :], k) prediction = kNN.classify(model, xs[i]) self.assertEqual(prediction, predictions[i]) if prediction == ys[i]: correct += 1 self.assertEqual(correct, 13)
def test_leave_one_out(self): correct = 0 k = 3 model = kNN.train(xs, ys, k) predictions = [1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1] for i in range(len(predictions)): model = kNN.train(xs[:i] + xs[i + 1:], ys[:i] + ys[i + 1:], k) prediction = kNN.classify(model, xs[i]) self.assertEqual(prediction, predictions[i]) if prediction == ys[i]: correct += 1 self.assertEqual(correct, 13)
def get(self): offset = int(self.get_argument('o', default='1')) rowcount = int(self.get_argument('r', default='10')) offset = (offset - 1) * rowcount no = self.get_argument('no', default='') model_id = self.get_argument('model_id', default='') model_type = self.get_argument('model_type', default='') package = self.get_argument('model_name', default='') cur = self.db.getCursor() rowdata = {} #查询 if no == '1': if model_type == '1': cur.execute( " select b.name,a.create_id,a.name,a.note,a.beta from public.logistis a " " left join public.account b on a.create_id = b.id " "where a.id='%s' " % (model_id)) rows = cur.fetchall() print(rows) rowdata['struct'] = "id,create_id,name,note,beta " rowdata['rows'] = rows else: cur.execute( " select b.name,a.create_id,a.name,a.note,c.name,a.file_name from public.pymodel a " " left join public.account b on a.create_id = b.id " " left join public.model c on a.type = c.type " " where a.id='%s' and a.type='%s' " % (model_id, model_type)) rows = cur.fetchall() rowdata['struct'] = "id,create_id,name,note,type,filename " rowdata['rows'] = rows self.response(rowdata) elif no == '2': if model_type == '1': beta = self.get_argument('beta', default='') model_data = self.get_argument('model', default='') a = [] q = 0 print(model_data) a = (list(eval(model_data))) model = LogisticRegression.LogisticRegression() model.beta = (list(eval(beta))) rowdata = {} rowdata['op'] = LogisticRegression.calculate(model, a) rowdata['rows'] = LogisticRegression.classify(model, a) elif model_type == '2': pack = 'data_mining.' + package import importlib bb = importlib.import_module(pack) ma = kNN.kNN() model = bb.model.knn(ma) model_data = self.get_argument('model', default='') a = [] a = (list(eval(model_data))) rowdata = {} rowdata['op'] = kNN.calculate(model, a) rowdata['rows'] = kNN.classify(model, a) elif model_type == '3': pack = 'data_mining.' + package import importlib bb = importlib.import_module(pack) ma = NaiveBayes.NaiveBayes() model = bb.model.bayes(ma) model_data = self.get_argument('model', default='') a = [] a = (list(eval(model_data))) rowdata = {} rowdata['op'] = NaiveBayes.calculate(model, a) rowdata['rows'] = NaiveBayes.classify(model, a) self.response(rowdata)
features = tfidf.toarray() #将tf-idf矩阵抽取出来,元素a[i][j]表示j词在i类文本中的tf-idf权重 print(features.shape) target = [c for (d, c) in documents] train_set1 = features[:1500, :] target_train = target[:1500] test_set1 = features[1500:, :] target_test = target[1500:] from sklearn.svm import SVC svclf = SVC(kernel='linear', probability=True) svclf.fit(train_set1, target_train) pred_svc = svclf.predict(test_set1) print('SVM=', sum(pred_svc == target_test) / len(target_test)) from Bio import kNN from scipy import spatial model = kNN.train(train_set1, target_train, 7) dd = [ kNN.classify(model, t, distance_fn=spatial.distance.cosine) for t in test_set1 ] print('KNN_cos=', sum(np.array(dd) == np.array(target_test)) / len(target_test)) from sklearn.neighbors import KNeighborsClassifier knnclf = KNeighborsClassifier(n_neighbors=7) #default with k=5 knnclf.fit(train_set1, target_train) pred_knn = knnclf.predict(test_set1) print('KNN_eu=', sum(pred_knn == target_test) / len(target_test))
def get(self): offset = int(self.get_argument('o',default='1')) rowcount = int(self.get_argument('r',default='10')) offset=(offset-1)*rowcount no = self.get_argument('no', default='') model_id = self.get_argument('model_id', default='') model_type = self.get_argument('model_type', default='') package=self.get_argument('model_name', default='') cur=self.db.getCursor() rowdata={} #查询 if no=='1': if model_type =='1': cur.execute(" select b.name,a.create_id,a.name,a.note,a.beta from public.logistis a " " left join public.account b on a.create_id = b.id " "where a.id='%s' "% (model_id) ) rows = cur.fetchall() print(rows) rowdata['struct']="id,create_id,name,note,beta " rowdata['rows']= rows else: cur.execute(" select b.name,a.create_id,a.name,a.note,c.name,a.file_name from public.pymodel a " " left join public.account b on a.create_id = b.id " " left join public.model c on a.type = c.type " " where a.id='%s' and a.type='%s' "% (model_id,model_type) ) rows = cur.fetchall() rowdata['struct']="id,create_id,name,note,type,filename " rowdata['rows']= rows self.response(rowdata) elif no=='2': if model_type=='1': beta = self.get_argument('beta', default='') model_data=self.get_argument('model', default='') a=[] q=0 print(model_data) a=(list(eval(model_data))) model=LogisticRegression.LogisticRegression() model.beta=(list(eval(beta))) rowdata={} rowdata['op']=LogisticRegression.calculate(model,a) rowdata['rows']=LogisticRegression.classify(model,a) elif model_type=='2': pack='data_mining.'+package import importlib bb=importlib.import_module(pack) ma=kNN.kNN() model=bb.model.knn(ma) model_data=self.get_argument('model', default='') a=[] a=(list(eval(model_data))) rowdata={} rowdata['op']=kNN.calculate(model,a) rowdata['rows']=kNN.classify(model,a) elif model_type=='3': pack='data_mining.'+package import importlib bb=importlib.import_module(pack) ma=NaiveBayes.NaiveBayes() model=bb.model.bayes(ma) model_data=self.get_argument('model', default='') a=[] a=(list(eval(model_data))) rowdata={} rowdata['op']=NaiveBayes.calculate(model,a) rowdata['rows']=NaiveBayes.classify(model,a) self.response(rowdata)