def test_grids_list_get(self): iris = load_iris() client = DjangoClient() response = client.get(reverse('grids_list')) self.assertEqual(200, response.status_code) self.assertEqual(0, len(response.data)) gs1 = ATGridSearchCV(tree.DecisionTreeClassifier(), { 'criterion': ['gini', 'entropy'], 'max_depth': range(1, 6), 'max_features': ['auto', 'log2'] }, webserver_url=self.live_server_url) wait(gs1.fit(iris.data, iris.target)) response = client.get(reverse('grids_list')) self.assertEqual(200, response.status_code) self.assertEqual(1, len(response.data)) gs2 = ATGridSearchCV(tree.ExtraTreeClassifier(), { 'criterion': ['gini', 'entropy'], 'max_depth': range(1, 6), 'max_features': ['auto', 'log2'] }, webserver_url=self.live_server_url) wait(gs2.fit(iris.data, iris.target)) response = client.get(reverse('grids_list')) self.assertEqual(200, response.status_code) self.assertEqual(2, len(response.data))
def _create_dataset(): examples_file, label_file = BytesIO(), BytesIO() examples_file.name = 'examples.csv' label_file.name = 'labels.csv' iris = load_iris() numpy.savetxt(examples_file, iris.data, delimiter=',') numpy.savetxt(label_file, iris.target, delimiter=',') examples_file.seek(0), label_file.seek(0) return examples_file, label_file
def test_dataset_model_single_file(self): examples_file, label_file = _create_dataset() ds, _ = DataSet.objects.get_or_create(name='TEST', examples=SimpleUploadedFile(examples_file.name, examples_file.read()), labels=SimpleUploadedFile(label_file.name, label_file.read())) self.assertEqual('datasets/TEST/examples.csv', ds.examples.name) self.assertEqual('datasets/TEST/labels.csv', ds.labels.name) loaded_train = numpy.genfromtxt(ds.examples, delimiter=',') loaded_labels = numpy.genfromtxt(ds.labels, delimiter=',') iris = load_iris() self.assertTrue(numpy.array_equal(loaded_train, iris.data)) self.assertTrue(numpy.array_equal(loaded_labels, iris.target))
def test_iris_forest(self): COMMAND = 'iris_experiments' call_command(COMMAND, 'Forest', 'TEST', url=self.live_server_url) self.assertEqual( 1, GridSearch.objects.filter( classifier='RandomForestClassifier').count(), GridSearch.objects.filter(classifier='RandomForestClassifier')) gs_forest = GridSearch.objects.get(classifier='RandomForestClassifier') self.assertAlmostEqual(gs_forest.results.count(), 2 * 5 * (len(load_iris().data[0]) - 1), delta=5)
def test_iris_tree(self): COMMAND = 'iris_experiments' call_command(COMMAND, 'Tree', 'TEST', url=self.live_server_url) self.assertEqual( 1, GridSearch.objects.filter( classifier='DecisionTreeClassifier').count(), GridSearch.objects.filter(classifier='DecisionTreeClassifier')) gs_tree = GridSearch.objects.get(classifier='DecisionTreeClassifier') self.assertAlmostEqual( gs_tree.results.all().count(), 2 * 5 * (len(load_iris().data[0]) - 1), msg=GridSearch.objects.filter(classifier='DecisionTreeClassifier'), delta=5)
def test_ATGridSearchCV_no_dataset(self): iris = load_iris() grid_size = 2 * 20 * 4 gs = ATGridSearchCV(tree.DecisionTreeClassifier(), { 'criterion': ['gini', 'entropy'], 'max_depth': range(1, 21), 'max_features': ['auto', 'log2', 'sqrt', None] }, webserver_url=self.live_server_url) wait(gs.fit(iris.data, iris.target)) self.assertAlmostEqual( grid_size, GridSearch.objects.get(uuid=gs._uuid).results.count(), delta=5)
def test_grid_detail(self): iris = load_iris() client = DjangoClient() gs1 = ATGridSearchCV(tree.DecisionTreeClassifier(), { 'criterion': ['gini', 'entropy'], 'max_depth': range(1, 6), 'max_features': ['auto', 'log2'] }, webserver_url=self.live_server_url) wait(gs1.fit(iris.data, iris.target)) response = client.get( reverse('grid_detail', kwargs={'uuid': gs1._uuid})) self.assertEqual(200, response.status_code) self.assertEqual(response.data['uuid'], str(gs1._uuid))
def test_dataset_post_dataset_length_mismatch(self): examples_file, label_file = BytesIO(), BytesIO() examples_file.name = 'examples.csv' label_file.name = 'labels.csv' iris = load_iris() breast_cancer = load_breast_cancer() numpy.savetxt(examples_file, iris.data, delimiter=',') numpy.savetxt(label_file, breast_cancer.target, delimiter=',') examples_file.seek(0), label_file.seek(0) client = DjangoClient() response = client.post(reverse('datasets'), data={ 'dataset': 'TEST', 'file[0]': examples_file, 'file[1]': label_file }) self.assertEqual(400, response.status_code) self.assertEqual(b'"Examples and labels are not the same length"', response.content)
def test_dask_cv_single(self): test_cluster = LocalCluster(1) test_client = Client(test_cluster) iris = load_iris() reg = tree.DecisionTreeClassifier() cv_score = test_client.submit(cross_val_score, reg, iris.data, iris.target) self.assertGreater(cv_score.result().mean(), 0) test_cluster.scale_up(4) _cv_results = { 'reg_%i': test_client.submit(cross_val_score, tree.DecisionTreeClassifier(min_samples_leaf=i), iris.data, iris.target) for i in range(5) } cv_results = test_client.gather(list(_cv_results.values())) for cv_result in cv_results: self.assertGreaterEqual(cv_result.mean(), 0)
import matplotlib.pyplot as plt from sklearn.decomposition import PCA from sklearn.datasets.base import load_iris import numpy as np """pca运用的例子""" # 加载鸢尾花数据,x表示样本的属性数据,y表示样本标签 x, y = load_iris(return_X_y=True) print("original_x size:", x.shape) print("y的取值:", set(y), "y size: ", y.shape) pca = PCA(n_components=2) # 将到2维 reduced_x = pca.fit_transform(x) # 降维操作 # print("reduced_x:\n", reduced_x) print("reduced_x size: ", reduced_x.shape) red_x = reduced_x[np.where(y == 0), 0].T red_y = reduced_x[np.where(y == 0), 1].T blue_x = reduced_x[np.where(y == 1), 0].T blue_y = reduced_x[np.where(y == 1), 1].T green_x = reduced_x[np.where(y == 2), 0].T green_y = reduced_x[np.where(y == 2), 1].T # 降维数据画散点图查看效果 plt.scatter(red_x, red_y,c='r',marker="*") plt.scatter(blue_x, blue_y,c='b',marker="D") plt.scatter(green_x, green_y,c='g',marker=".") plt.savefig("./pca_demo.png") plt.show()
Test code for testing KNNClassifier implementation @author: rajajosh ''' from sklearn.datasets.base import load_iris from sklearn.cross_validation import train_test_split from sklearn.neighbors.classification import KNeighborsClassifier from sklearn.metrics.classification import accuracy_score from KNNClassifier import KNNClassifier import time from MyRandomClassifier import MyRandomClassifier iris = load_iris() x = iris.data y = iris.target x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.95) print(x_test,x_train,y_test,y_train) print("Using random classifier:") clsf = MyRandomClassifier() clsf.fit(x_train, y_train) predictions = clsf.predict(x_test) print(predictions,y_test) print(accuracy_score(y_test,predictions)) print("Using KNN classifier:")
# -*- coding: GBK -*- ''' Created on 2013Äê11ÔÂ5ÈÕ @author: asus ''' from sklearn.datasets.base import load_iris from matplotlib import pyplot as plt import numpy as np data = load_iris() print data['data'] print data['feature_names'] print data['target'] print data['target_names'] features = data['data'] feature_names = data['feature_names'] target = data['target'] for x in range(3): print x for t, marker, c in zip(xrange(3), ">ox", "rgb"): # We plot each class on its own to get different colored markers plt.subplot(2, 2, x + 1) color = c, marker plt.scatter(features[target == t, x], features[target == t, x + 1], c=c,
''' Created on 2019年10月23日 @author: havery ''' #train_test_split 是一个用于将数据拆分为两个独立数据集的效用函数 from sklearn.model_selection import train_test_split from sklearn.datasets.base import load_iris #stratify参数可强制将训练和测试数据集的类分布与整个数据集的类分布相同 iris = load_iris() X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, stratify = iris.target, random_state=42) #逻辑回归分类器 from sklearn.linear_model import LogisticRegression clf=LogisticRegression(solver='lbfgs', multi_class='ovr', max_iter=5000, random_state=42) #使用fit方法学习机器学习模型 clf.fit(X_train, y_train) #使用score方法来测试此方法,依赖于默认的准确度指标 accuracy=clf.score(X_test, y_test) print('{}逻辑回归的精确度得分是:{}'.format(clf.__class__.__name__, accuracy)) #随机森林分类器 from sklearn.ensemble import RandomForestClassifier clf2 = RandomForestClassifier(n_estimators=100, n_jobs=1, random_state=42) clf2.fit(X_train, y_train) accuracy2 = clf2.score(X_test, y_test) print('{}随机森林的精确度得分是:{}'.format(clf2.__class__.__name__, accuracy2)) #梯度提升决策树分类器 from sklearn.datasets import load_breast_cancer X_breast, y_breast = load_breast_cancer(return_X_y=True)
import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn.datasets import base from sklearn.neighbors import KNeighborsClassifier scikit_iris = base.load_iris() iris = pd.DataFrame(data=np.c_[scikit_iris['data'], scikit_iris['target']], columns=np.append(scikit_iris.feature_names, ['y'])) iris.head(3)
def setup(self): self.Xb, self.yb = load_breast_cancer(return_X_y=True) self.Xmc, self.ymc = load_iris(return_X_y=True)
def read_iris(): iris = load_iris() return pd.DataFrame(data=np.c_[iris['data'], iris['target']], columns=iris['feature_names'] + ['target'])
''' from sklearn.datasets.base import load_iris from sklearn.model_selection._split import train_test_split import pandas as pd import numpy as np import matplotlib.pyplot as plt import mglearn #from IPython.display import display from pandas.plotting import scatter_matrix if __name__ == '__main__': pass # returns Bunch object iris_ds = load_iris() print("Keys: \n{}".format(iris_ds.keys())) print() print(iris_ds['DESCR'][:700] + "\n ...") print() print("Target names: {}".format(iris_ds['target_names'])) print("Feaure names: {}".format(iris_ds['feature_names'])) print() print("Type of data: {}".format(type(iris_ds['data']))) print("DS shape: {}".format(iris_ds['data'].shape)) print()
#!/usr/bin/env python # encoding=gbk import matplotlib.pyplot as plt import sklearn.decomposition as dp from sklearn.datasets.base import load_iris x,y=load_iris(return_X_y=True) #加载数据,x表示数据集中的属性数据,y表示数据标签 pca=dp.PCA(n_components=2) #加载pca算法,设置降维后主成分数目为2 reduced_x=pca.fit_transform(x) #对原始数据进行降维,保存在reduced_x中 red_x,red_y=[],[] blue_x,blue_y=[],[] green_x,green_y=[],[] for i in range(len(reduced_x)): #按鸢尾花的类别将降维后的数据点保存在不同的表中 if y[i]==0: red_x.append(reduced_x[i][0]) red_y.append(reduced_x[i][1]) elif y[i]==1: blue_x.append(reduced_x[i][0]) blue_y.append(reduced_x[i][1]) else: green_x.append(reduced_x[i][0]) green_y.append(reduced_x[i][1]) plt.scatter(red_x,red_y,c='r',marker='x') plt.scatter(blue_x,blue_y,c='b',marker='D') plt.scatter(green_x,green_y,c='g',marker='.') plt.show()
# -*- coding: GBK -*- ''' Created on 2013Äê11ÔÂ5ÈÕ @author: asus ''' from sklearn.datasets.base import load_iris from matplotlib import pyplot as plt import numpy as np data = load_iris(); print data['data'] print data['feature_names'] print data['target'] print data['target_names'] features = data['data'] feature_names = data['feature_names'] target = data['target'] for x in range(3): print x for t,marker,c in zip(xrange(3),">ox","rgb"): # We plot each class on its own to get different colored markers plt.subplot(2,2,x+1) color = c,marker plt.scatter(features[target == t,x],features[target == t,x+1],c=c,marker=marker)