def test_grids_list_get(self):
     iris = load_iris()
     client = DjangoClient()
     response = client.get(reverse('grids_list'))
     self.assertEqual(200, response.status_code)
     self.assertEqual(0, len(response.data))
     gs1 = ATGridSearchCV(tree.DecisionTreeClassifier(), {
         'criterion': ['gini', 'entropy'],
         'max_depth': range(1, 6),
         'max_features': ['auto', 'log2']
     },
                          webserver_url=self.live_server_url)
     wait(gs1.fit(iris.data, iris.target))
     response = client.get(reverse('grids_list'))
     self.assertEqual(200, response.status_code)
     self.assertEqual(1, len(response.data))
     gs2 = ATGridSearchCV(tree.ExtraTreeClassifier(), {
         'criterion': ['gini', 'entropy'],
         'max_depth': range(1, 6),
         'max_features': ['auto', 'log2']
     },
                          webserver_url=self.live_server_url)
     wait(gs2.fit(iris.data, iris.target))
     response = client.get(reverse('grids_list'))
     self.assertEqual(200, response.status_code)
     self.assertEqual(2, len(response.data))
Beispiel #2
0
def _create_dataset():
    examples_file, label_file = BytesIO(), BytesIO()
    examples_file.name = 'examples.csv'
    label_file.name = 'labels.csv'
    iris = load_iris()
    numpy.savetxt(examples_file, iris.data, delimiter=',')
    numpy.savetxt(label_file, iris.target, delimiter=',')
    examples_file.seek(0), label_file.seek(0)
    return examples_file, label_file
 def test_dataset_model_single_file(self):
     examples_file, label_file = _create_dataset()
     ds, _ = DataSet.objects.get_or_create(name='TEST', 
                                           examples=SimpleUploadedFile(examples_file.name, examples_file.read()),
                                           labels=SimpleUploadedFile(label_file.name, label_file.read()))
     self.assertEqual('datasets/TEST/examples.csv', ds.examples.name)
     self.assertEqual('datasets/TEST/labels.csv', ds.labels.name)
     loaded_train = numpy.genfromtxt(ds.examples, delimiter=',')
     loaded_labels = numpy.genfromtxt(ds.labels, delimiter=',')
     iris = load_iris()
     self.assertTrue(numpy.array_equal(loaded_train, iris.data))
     self.assertTrue(numpy.array_equal(loaded_labels, iris.target))
 def test_iris_forest(self):
     COMMAND = 'iris_experiments'
     call_command(COMMAND, 'Forest', 'TEST', url=self.live_server_url)
     self.assertEqual(
         1,
         GridSearch.objects.filter(
             classifier='RandomForestClassifier').count(),
         GridSearch.objects.filter(classifier='RandomForestClassifier'))
     gs_forest = GridSearch.objects.get(classifier='RandomForestClassifier')
     self.assertAlmostEqual(gs_forest.results.count(),
                            2 * 5 * (len(load_iris().data[0]) - 1),
                            delta=5)
 def test_iris_tree(self):
     COMMAND = 'iris_experiments'
     call_command(COMMAND, 'Tree', 'TEST', url=self.live_server_url)
     self.assertEqual(
         1,
         GridSearch.objects.filter(
             classifier='DecisionTreeClassifier').count(),
         GridSearch.objects.filter(classifier='DecisionTreeClassifier'))
     gs_tree = GridSearch.objects.get(classifier='DecisionTreeClassifier')
     self.assertAlmostEqual(
         gs_tree.results.all().count(),
         2 * 5 * (len(load_iris().data[0]) - 1),
         msg=GridSearch.objects.filter(classifier='DecisionTreeClassifier'),
         delta=5)
 def test_ATGridSearchCV_no_dataset(self):
     iris = load_iris()
     grid_size = 2 * 20 * 4
     gs = ATGridSearchCV(tree.DecisionTreeClassifier(), {
         'criterion': ['gini', 'entropy'],
         'max_depth': range(1, 21),
         'max_features': ['auto', 'log2', 'sqrt', None]
     },
                         webserver_url=self.live_server_url)
     wait(gs.fit(iris.data, iris.target))
     self.assertAlmostEqual(
         grid_size,
         GridSearch.objects.get(uuid=gs._uuid).results.count(),
         delta=5)
 def test_grid_detail(self):
     iris = load_iris()
     client = DjangoClient()
     gs1 = ATGridSearchCV(tree.DecisionTreeClassifier(), {
         'criterion': ['gini', 'entropy'],
         'max_depth': range(1, 6),
         'max_features': ['auto', 'log2']
     },
                          webserver_url=self.live_server_url)
     wait(gs1.fit(iris.data, iris.target))
     response = client.get(
         reverse('grid_detail', kwargs={'uuid': gs1._uuid}))
     self.assertEqual(200, response.status_code)
     self.assertEqual(response.data['uuid'], str(gs1._uuid))
 def test_dataset_post_dataset_length_mismatch(self):
     examples_file, label_file = BytesIO(), BytesIO()
     examples_file.name = 'examples.csv'
     label_file.name = 'labels.csv'
     iris = load_iris()
     breast_cancer = load_breast_cancer()
     numpy.savetxt(examples_file, iris.data, delimiter=',')
     numpy.savetxt(label_file, breast_cancer.target, delimiter=',')
     examples_file.seek(0), label_file.seek(0)
     client = DjangoClient()
     response = client.post(reverse('datasets'),
                            data={
                                'dataset': 'TEST',
                                'file[0]': examples_file,
                                'file[1]': label_file
                            })
     self.assertEqual(400, response.status_code)
     self.assertEqual(b'"Examples and labels are not the same length"',
                      response.content)
 def test_dask_cv_single(self):
     test_cluster = LocalCluster(1)
     test_client = Client(test_cluster)
     iris = load_iris()
     reg = tree.DecisionTreeClassifier()
     cv_score = test_client.submit(cross_val_score, reg, iris.data,
                                   iris.target)
     self.assertGreater(cv_score.result().mean(), 0)
     test_cluster.scale_up(4)
     _cv_results = {
         'reg_%i':
         test_client.submit(cross_val_score,
                            tree.DecisionTreeClassifier(min_samples_leaf=i),
                            iris.data, iris.target)
         for i in range(5)
     }
     cv_results = test_client.gather(list(_cv_results.values()))
     for cv_result in cv_results:
         self.assertGreaterEqual(cv_result.mean(), 0)
Beispiel #10
0
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.datasets.base import load_iris
import numpy as np
"""pca运用的例子"""
# 加载鸢尾花数据,x表示样本的属性数据,y表示样本标签
x, y = load_iris(return_X_y=True)
print("original_x size:", x.shape)
print("y的取值:", set(y), "y size: ", y.shape)
pca = PCA(n_components=2) # 将到2维
reduced_x = pca.fit_transform(x) # 降维操作
# print("reduced_x:\n", reduced_x)
print("reduced_x size: ", reduced_x.shape)

red_x = reduced_x[np.where(y == 0), 0].T
red_y = reduced_x[np.where(y == 0), 1].T
blue_x = reduced_x[np.where(y == 1), 0].T
blue_y = reduced_x[np.where(y == 1), 1].T
green_x = reduced_x[np.where(y == 2), 0].T
green_y = reduced_x[np.where(y == 2), 1].T
# 降维数据画散点图查看效果
plt.scatter(red_x, red_y,c='r',marker="*")
plt.scatter(blue_x, blue_y,c='b',marker="D")
plt.scatter(green_x, green_y,c='g',marker=".")
plt.savefig("./pca_demo.png")
plt.show()
Test code for testing KNNClassifier implementation

@author: rajajosh
'''
from sklearn.datasets.base import load_iris
from sklearn.cross_validation import train_test_split

from sklearn.neighbors.classification import KNeighborsClassifier

from sklearn.metrics.classification import accuracy_score
from KNNClassifier import KNNClassifier
import time
from MyRandomClassifier import MyRandomClassifier

iris = load_iris()

x = iris.data
y = iris.target

x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.95)
print(x_test,x_train,y_test,y_train)

print("Using random classifier:")
clsf = MyRandomClassifier()
clsf.fit(x_train, y_train)
predictions = clsf.predict(x_test)
print(predictions,y_test)
print(accuracy_score(y_test,predictions))

print("Using KNN classifier:")
Beispiel #12
0
# -*- coding: GBK -*-
'''
Created on 2013Äê11ÔÂ5ÈÕ

@author: asus
'''

from sklearn.datasets.base import load_iris
from matplotlib import pyplot as plt
import numpy as np

data = load_iris()
print data['data']
print data['feature_names']
print data['target']
print data['target_names']

features = data['data']
feature_names = data['feature_names']
target = data['target']

for x in range(3):
    print x
    for t, marker, c in zip(xrange(3), ">ox", "rgb"):
        # We plot each class on its own to get different colored markers
        plt.subplot(2, 2, x + 1)

        color = c, marker
        plt.scatter(features[target == t, x],
                    features[target == t, x + 1],
                    c=c,
Beispiel #13
0
'''
Created on 2019年10月23日

@author: havery
'''
#train_test_split 是一个用于将数据拆分为两个独立数据集的效用函数
from sklearn.model_selection import train_test_split
from sklearn.datasets.base import load_iris
#stratify参数可强制将训练和测试数据集的类分布与整个数据集的类分布相同
iris = load_iris()
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, stratify = iris.target, random_state=42)

#逻辑回归分类器
from sklearn.linear_model import LogisticRegression
clf=LogisticRegression(solver='lbfgs', multi_class='ovr', max_iter=5000, random_state=42)
#使用fit方法学习机器学习模型
clf.fit(X_train, y_train)
#使用score方法来测试此方法,依赖于默认的准确度指标
accuracy=clf.score(X_test, y_test)
print('{}逻辑回归的精确度得分是:{}'.format(clf.__class__.__name__, accuracy))

#随机森林分类器
from sklearn.ensemble import RandomForestClassifier
clf2 = RandomForestClassifier(n_estimators=100, n_jobs=1, random_state=42)
clf2.fit(X_train, y_train)
accuracy2 = clf2.score(X_test, y_test)
print('{}随机森林的精确度得分是:{}'.format(clf2.__class__.__name__, accuracy2))

#梯度提升决策树分类器
from sklearn.datasets import load_breast_cancer
X_breast, y_breast = load_breast_cancer(return_X_y=True)
Beispiel #14
0
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import base
from sklearn.neighbors import KNeighborsClassifier

scikit_iris = base.load_iris()
iris = pd.DataFrame(data=np.c_[scikit_iris['data'], scikit_iris['target']],
                    columns=np.append(scikit_iris.feature_names, ['y']))

iris.head(3)
Beispiel #15
0
    def setup(self):
        self.Xb, self.yb = load_breast_cancer(return_X_y=True)


        self.Xmc, self.ymc = load_iris(return_X_y=True)
Beispiel #16
0
def read_iris():
    iris = load_iris()
    return pd.DataFrame(data=np.c_[iris['data'], iris['target']],
                        columns=iris['feature_names'] + ['target'])
'''

from sklearn.datasets.base import load_iris
from sklearn.model_selection._split import train_test_split
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import mglearn
#from IPython.display import display
from pandas.plotting import scatter_matrix

if __name__ == '__main__':
    pass

# returns Bunch object
iris_ds = load_iris()

print("Keys: \n{}".format(iris_ds.keys()))

print()
print(iris_ds['DESCR'][:700] + "\n ...")

print()
print("Target names: {}".format(iris_ds['target_names']))
print("Feaure names: {}".format(iris_ds['feature_names']))

print()
print("Type of data: {}".format(type(iris_ds['data'])))
print("DS shape: {}".format(iris_ds['data'].shape))

print()
Beispiel #18
0
#!/usr/bin/env python
# encoding=gbk

import matplotlib.pyplot as plt
import sklearn.decomposition as dp
from sklearn.datasets.base import load_iris

x,y=load_iris(return_X_y=True) #加载数据,x表示数据集中的属性数据,y表示数据标签
pca=dp.PCA(n_components=2) #加载pca算法,设置降维后主成分数目为2
reduced_x=pca.fit_transform(x) #对原始数据进行降维,保存在reduced_x中
red_x,red_y=[],[]
blue_x,blue_y=[],[]
green_x,green_y=[],[]
for i in range(len(reduced_x)): #按鸢尾花的类别将降维后的数据点保存在不同的表中
    if y[i]==0:
        red_x.append(reduced_x[i][0])
        red_y.append(reduced_x[i][1])
    elif y[i]==1:
        blue_x.append(reduced_x[i][0])
        blue_y.append(reduced_x[i][1])
    else:
        green_x.append(reduced_x[i][0])
        green_y.append(reduced_x[i][1])
plt.scatter(red_x,red_y,c='r',marker='x')
plt.scatter(blue_x,blue_y,c='b',marker='D')
plt.scatter(green_x,green_y,c='g',marker='.')
plt.show()
Beispiel #19
0
# -*- coding: GBK -*-
'''
Created on 2013Äê11ÔÂ5ÈÕ

@author: asus
'''

from sklearn.datasets.base import load_iris
from matplotlib import pyplot as plt
import numpy as np

data = load_iris();
print data['data']
print data['feature_names']
print data['target']
print data['target_names']

features = data['data']
feature_names = data['feature_names']
target = data['target']



for x in range(3):
    print x
    for t,marker,c in zip(xrange(3),">ox","rgb"):
# We plot each class on its own to get different colored markers
        plt.subplot(2,2,x+1)
     
        color = c,marker
        plt.scatter(features[target == t,x],features[target == t,x+1],c=c,marker=marker)