def buildFirstClassificationModel(self): global features, feature_names, target, target_names labels = target_names[target] is_setosa = labels == "setosa" # get then max petal length of setosa and min petal length of non-setosa plength_setosa = features[is_setosa, 2] plength_non_setosa = features[~is_setosa, 2] maxSetosa = plength_setosa.max() min_nonSetosa = plength_non_setosa.min() logging.info("max plength for setosa : %f", maxSetosa) logging.info("min plength for non-setosa : %f", min_nonSetosa) # select Features & Labels for non Setosa: features_nonSetosa = features[~(labels == "setosa")] labels_nonSetosa = labels[labels != 'setosa'] is_virginica = (labels_nonSetosa == "virginica") accuracy_results, best_fi, best_t, best_reverse = getModel(features_nonSetosa, is_virginica, feature_names) self.drawAccuracyComparisionChart(accuracy_results) # test the threshold on a sample example = np.array([1.6, 2.5, 4.3, 2.6]) self.is_virginica_test(best_fi, best_t, best_reverse, example)
def cross_validation(self): global features, feature_names, target, target_names is_setosa = target_names[target] == "setosa" features_wo_setosa = features[~is_setosa] labels_wo_setosa = target_names[target][~is_setosa] is_virginica = labels_wo_setosa == "virginica" # set a vector of all but one True of the size of features_wo_setosa correct = 0.0 for ei in range(len(features_wo_setosa)): train = np.ones(len(features_wo_setosa), bool) train[ei] = False testing = ~train model = getModel(features_wo_setosa[train], is_virginica[train], feature_names) correct += np.sum(predict(model, features_wo_setosa[testing]) == is_virginica[testing]) acc = correct / float(len(features_wo_setosa)) logging.info("Accuracy on cross-validation : %f", acc)
def train_test_50(self): global features, feature_names, target, target_names labels = target_names[target] is_setosa = labels == "setosa" features_wo_setosa = features[~is_setosa] labels_wo_setosa = labels[~is_setosa] is_virginica = (labels_wo_setosa == "virginica") # split the features in train and test training = np.tile([False, True], 50) testing = ~training # display the resulting accuracy for both series model_training = getModel(features_wo_setosa[training], is_virginica[training], feature_names) accuracy_training = accuracy(model_training, features_wo_setosa[training], is_virginica[training]) accuracy_testing = accuracy(model_training, features_wo_setosa[testing], is_virginica[testing]) logging.info("Training Accuracy : %f ----Testing Accuracy = %f ", accuracy_training, accuracy_testing)
from main.ch02.utils import getModel, accuracy data = load_iris() features = data['data'] labels = data['target_names'][data['target']] # We are going to remove the setosa examples as they are too easy: is_setosa = (labels == 'setosa') features = features[~is_setosa] labels = labels[~is_setosa] # Now we classify virginica vs non-virginica is_virginica = (labels == 'virginica') # Split the data in two: testing and training testing = np.tile([True, False], 50) # testing = [True,False,True,False,True,False...] # Training is the negation of testing: i.e., datapoints not used for testing, # will be used for training training = ~testing model = getModel(features[training], is_virginica[training]) train_accuracy = accuracy(features[training], is_virginica[training], model) test_accuracy = accuracy(features[testing], is_virginica[testing], model) print('''\ Training accuracy was {0:.1%}. Testing accuracy was {1:.1%} (N = {2}). '''.format(train_accuracy, test_accuracy, testing.sum()))