from matplotlib import pyplot as plt
import numpy as np
from sklearn.datasets import load_iris
from threshold import fit_model, accuracy

data = load_iris()
features = data['data']
labels = data['target_names'][data['target']]

# We are going to remove the setosa examples as they are too easy:
setosa = (labels == 'setosa')
features = features[~setosa]
labels = labels[~setosa]

# Now we classify virginica vs non-virginica
virginica = (labels == 'virginica')

# Split the data in two: testing and training
testing = np.tile([True, False],
                  50)  # testing = [True,False,True,False,True,False...]
training = ~testing

model = fit_model(features[training], virginica[training])
train_accuracy = accuracy(features[training], virginica[training], model)
test_accuracy = accuracy(features[testing], virginica[testing], model)

print('''\
Training accuracy was {0:.1%}.
Testing accuracy was {1:.1%} (N = {2}).
'''.format(train_accuracy, test_accuracy, testing.sum()))
from sklearn.datasets import load_iris
from threshold import fit_model, accuracy

data = load_iris()
features = data['data']
labels = data['target_names'][data['target']]

# We are going to remove the setosa examples as they are too easy:
is_setosa = (labels == 'setosa')
features = features[~is_setosa]
labels = labels[~is_setosa]

# Now we classify virginica vs non-virginica
is_virginica = (labels == 'virginica')

# Split the data in two: testing and training
testing = np.tile([True, False], 50) # testing = [True,False,True,False,True,False...]

# Training is the negation of testing: i.e., datapoints not used for testing,
# will be used for training
training = ~testing

model = fit_model(features[training], is_virginica[training])
train_accuracy = accuracy(features[training], is_virginica[training], model)
test_accuracy = accuracy(features[testing], is_virginica[testing], model)

print('''\
Training accuracy was {0:.1%}.
Testing accuracy was {1:.1%} (N = {2}).
'''.format(train_accuracy, test_accuracy, testing.sum()))
Exemple #3
0
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Tue Aug 22 11:51:00 2017

@author: jabong
"""

import numpy as np
from load import load_dataset
from threshold import fit_model, accuracy

features, labels = load_dataset('seeds')
labels = (labels == 'Canadian')

error = 0.0
for fold in range(10):
    training = np.ones(len(features), bool)
    #start from index fold, make every 10th element zero till last
    training[fold::10] = 0
    testing = ~training

    model = fit_model(features[training], labels[training])
    test_error = accuracy(features[testing], labels[testing], model)
    error += test_error

error /= 10.0
print('Ten fold cross-validation error was {0:.1%}.'.format(error))
# by Willi Richert and Luis Pedro Coelho
# published by PACKT Publishing
#
# It is made available under the MIT License

from load import load_dataset
import numpy as np
from threshold import fit_model, accuracy

features, labels = load_dataset('seeds')

# 불 배열로 라벨로 저장
labels = (labels == 'Canadian')

error = 0.0
for fold in range(10):
    training = np.ones(len(features), bool)

    training[fold::10] = 0

    # 훈련 데이터에서 테스트 데이터로 구별
    testing = ~training

    model = fit_model(features[training], labels[training])
    test_error = accuracy(features[testing], labels[testing], model)
    error += test_error

error /= 10.0

print('Ten fold cross-validated error was {0:.1%}.'.format(error))