def buildFirstClassificationModel(self):
        global features, feature_names, target, target_names
        labels = target_names[target]
        is_setosa = labels == "setosa"

        # get then max petal length of setosa and min petal length of non-setosa
        plength_setosa = features[is_setosa, 2]
        plength_non_setosa = features[~is_setosa, 2]
        maxSetosa = plength_setosa.max()
        min_nonSetosa = plength_non_setosa.min()
        logging.info("max plength for setosa :  %f", maxSetosa)
        logging.info("min plength for non-setosa :  %f", min_nonSetosa)


        # select Features & Labels for non Setosa:
        features_nonSetosa = features[~(labels == "setosa")]
        labels_nonSetosa = labels[labels != 'setosa']
        is_virginica = (labels_nonSetosa == "virginica")

        accuracy_results, best_fi, best_t, best_reverse = getModel(features_nonSetosa, is_virginica, feature_names)

        self.drawAccuracyComparisionChart(accuracy_results)

        # test the threshold on a sample
        example = np.array([1.6, 2.5, 4.3, 2.6])
        self.is_virginica_test(best_fi, best_t, best_reverse, example)
    def cross_validation(self):

        global features, feature_names, target, target_names
        is_setosa = target_names[target] == "setosa"
        features_wo_setosa = features[~is_setosa]

        labels_wo_setosa = target_names[target][~is_setosa]

        is_virginica = labels_wo_setosa == "virginica"

        # set a vector of all but one True of the size of features_wo_setosa


        correct = 0.0
        for ei in range(len(features_wo_setosa)):
            train = np.ones(len(features_wo_setosa), bool)
            train[ei] = False
            testing = ~train
            model = getModel(features_wo_setosa[train], is_virginica[train], feature_names)
            correct += np.sum(predict(model, features_wo_setosa[testing]) == is_virginica[testing])
        acc = correct / float(len(features_wo_setosa))
        logging.info("Accuracy on cross-validation : %f", acc)
    def train_test_50(self):
        global features, feature_names, target, target_names

        labels = target_names[target]
        is_setosa = labels == "setosa"

        features_wo_setosa = features[~is_setosa]

        labels_wo_setosa = labels[~is_setosa]

        is_virginica = (labels_wo_setosa == "virginica")

        # split the features in train and test
        training = np.tile([False, True], 50)
        testing = ~training

        # display the resulting accuracy for both series
        model_training = getModel(features_wo_setosa[training], is_virginica[training], feature_names)

        accuracy_training = accuracy(model_training, features_wo_setosa[training], is_virginica[training])
        accuracy_testing = accuracy(model_training, features_wo_setosa[testing], is_virginica[testing])

        logging.info("Training Accuracy : %f   ----Testing Accuracy = %f  ", accuracy_training, accuracy_testing)
from main.ch02.utils import getModel, accuracy

data = load_iris()
features = data['data']
labels = data['target_names'][data['target']]

# We are going to remove the setosa examples as they are too easy:
is_setosa = (labels == 'setosa')
features = features[~is_setosa]
labels = labels[~is_setosa]

# Now we classify virginica vs non-virginica
is_virginica = (labels == 'virginica')

# Split the data in two: testing and training
testing = np.tile([True, False], 50)  # testing = [True,False,True,False,True,False...]

# Training is the negation of testing: i.e., datapoints not used for testing,
# will be used for training
training = ~testing

model = getModel(features[training], is_virginica[training])
train_accuracy = accuracy(features[training], is_virginica[training], model)
test_accuracy = accuracy(features[testing], is_virginica[testing], model)

print('''\
Training accuracy was {0:.1%}.
Testing accuracy was {1:.1%} (N = {2}).
'''.format(train_accuracy, test_accuracy, testing.sum()))