Esempio n. 1
0
def main():
    rawData = load_breast_cancer()
    trainingData = np.matrix(rawData.data, dtype='float')
    x = trainingData[:, 0:10]
    x = x / x.max(axis=0)
    y = np.matrix(rawData.target, dtype='float').T
    print("Data loaded successfully...")
    shuffle = False
    while True:
        print("Do you want to shuffle the data while spliting for training and testing? press y or n... q for quit...")
        userInput = input()
        if userInput == 'y' or userInput == 'Y':
            shuffle = True
            break
        elif userInput == 'n' or userInput == 'N':
            break
        elif userInput == 'q' or userInput == 'Q':
            print("Quitting the program...")
            exit()
        else:
            print("invalid input... Please provide a valid input.")
    
    animation = False
    while True:
        print("Do you want to show the animation for gradient descent? press y or n... q for quit...")
        userInput = input()
        if userInput == 'y' or userInput == 'Y':
            animation = True
            break
        elif userInput == 'n' or userInput == 'N':
            break
        elif userInput == 'q' or userInput == 'Q':
            print("Quitting the program...")
            exit()
        else:
            print("invalid input... Please provide a valid input.")

            
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.15, shuffle=shuffle)
    print("Training and testing data generated...")

    logReg = LogisticRegression(x_train, y_train)
    logReg.train(animation=animation, alpha=0.001)
    y_pred = logReg.test(x_test)
    score = logReg.scores(y_test)
    print("precision: ", score['precision'])
    print("recall: ", score['recall'])
    print("f1 score: ", score['f1'])
    print("accuaracy: ", score['accuaracy'])
    print("confusionMatrix: ", score['confusionMatrix'])
print("train_set_x_flatten shape: " + str(train_set_x_flatten.shape))
print("train_set_y shape: " + str(train_set_y.shape))
print("test_set_x_flatten shape: " + str(test_set_x_flatten.shape))
print("test_set_y shape: " + str(test_set_y.shape))
print("sanity check after reshaping: " + str(train_set_x_flatten[0:5, 0]))

# Standerdize the dataset
train_set_x = train_set_x_flatten / 255.
test_set_x = test_set_x_flatten / 255.
###################################################################################################

# Fit Logistic Model
LR = LogisticRegression(dim=12288)
grads, costs = LR.train(X=train_set_x,
                        Y=train_set_y,
                        num_iterations=2000,
                        learning_rate=0.005,
                        print_cost=True,
                        plot_cost=True)

# Prediction Accuracy for training data
y_pred = LR.predict(X=train_set_x)
LR.accuracy_stats(train_set_y, y_pred)

# Prediction Accuracy for test data
y_test_pred = LR.predict(X=test_set_x)
LR.accuracy_stats(test_set_y, y_test_pred)

###################################################################################################

# Experiment with different learning rates
learning_rates = [0.01, 0.001, 0.0001]
import matplotlib.pyplot as plt
import os
# set current directory
os.chdir("F:\\neuralnetworksanddeeplearning\\codes")
from logisticRegression import LogisticRegression
from generalFunctions import plot_decision_boundary

######################################################################################

## Generate a dataset a plot it
np.random.seed(0)
X, y = datasets.make_moons(200, noise=0.20)
plt.scatter(x = X[:,0], y = X[:,1], c = y, cmap = plt.cm.Spectral)

# The data is not linearly separable, this means that linear classifiers like logistic regression wont be 
# able to fit the data unless you hand-engineer non-linear features (such as polynomials).
# In fact, that's one of the major advantages of Neural Networks. You don't need to worry about the
# feature engineering. The hidden layer of neural network will learn the features for you.
######################################################################################

## Logistic Regression
num_features = 2
LR = LogisticRegression(dim = num_features)
grads, costs = LR.train(X = X.T, Y = y.reshape(1, 200), print_cost = True, num_iterations = 5000, learning_rate = 0.01, plot_cost = True)
plot_decision_boundary(lambda x: LR.predict(x), X = X, y = y) 
plt.title("Logistic Regression") 

# The graph shows the decision boundary learned by our logistic regression classifier. It separates
# the data as good as it can using a straight line, but it's unable to capure the "moon-shape" of
# our data.