Example #1
0
def main():
    """ Example of how to load and parse MNIST data. """
    
    train_set, test_set = load_data()

    # train_set is a two-element tuple. The first element, i.e.,
    # train_set[0] is a 60,000 x 784 numpy matrix. There are 60k
    # rows in the matrix, each row corresponding to a single example.
    # There are 784 columns, each corresponding to the value of a
    # single pixel in the 28x28 image.
    print "\nDimensions of training set feature matrix:", 
    print train_set[FEATURE].shape

    # The labels for each example are maintained separately in train_set[1].
    # This is a 60,000 x 1 numpy matrix, where each element is the label
    # for the corresponding training example.
    print "\nDimensions of training set label matrix:", train_set[LABEL].shape

    # Example of how to access a individual training example (in this case,
    # the third example, i.e., the training example at index 2). We could 
    # also just use print to output it to the screen, but pretty_print formats 
    # the data in a nicer way: if you squint, you should be able to make out 
    # the number 4 in the matrix data.
    print "\nFeatures of third training example:\n"
    pretty_print(train_set[FEATURE][2])

    # And here's the label that goes with that training example
    print "\nLabel of first training example:", train_set[LABEL][2], "\n"
def final_test():
    train_set, test_set = load_data()
    X = train_set[FEATURE]
    Y =train_set[LABEL]
    X_test = test_set[FEATURE]
    Y_test =test_set[LABEL]
    clf = SGDClassifier(loss='log',alpha=0.0002, shuffle=False, n_iter=50)
    print "Training..."
    clf.fit(X, Y)
    print "Scoring..."
    score = clf.score(X_test, Y_test, sample_weight=None)
    print score
Example #3
0
def main():
    train_set, test_set = load_data()
    train_set_size = len(train_set[FEATURE])

    heat_map = dict((i,[0 for i in xrange(784)])for i in xrange(10))
    count_map = list(range(10))
    for feature,lable in zip(train_set[FEATURE],train_set[LABEL]):
        for i in xrange(784):
            heat_map[lable][i] += feature[i]
            count_map[lable]+=1

    #average
    for num,psum in heat_map.items():
        heat_map[num] = [k/count_map[num] for k in psum]
    afile = open(r'heatMap.pkl', 'wb')
    pickle.dump(heat_map, afile)
    afile.close()
Example #4
0
def main():
    """ Example of how to load and parse MNIST data. """

    train_set, test_set = load_data()

    # train_set is a two-element tuple. The first element, i.e.,
    # train_set[0] is a 60,000 x 784 numpy matrix. There are 60k
    # rows in the matrix, each row corresponding to a single example.
    # There are 784 columns, each corresponding to the value of a
    # single pixel in the 28x28 image.
    print "\nDimensions of training set feature matrix:",
    print train_set[FEATURE].shape

    # The labels for each example are maintained separately in train_set[1].
    # This is a 60,000 x 1 numpy matrix, where each element is the label
    # for the corresponding training example.
    print "\nDimensions of training set label matrix:", train_set[LABEL].shape

    # Example of how to access a individual training example (in this case,
    # the third example, i.e., the training example at index 2). We could
    # also just use print to output it to the screen, but pretty_print formats
    # the data in a nicer way: if you squint, you should be able to make out
    # the number 4 in the matrix data.
    print "\nFeatures of third training example:\n"
    #pretty_print(train_set[FEATURE][2])

    # And here's the label that goes with that training example
    print "\nLabel of the third training example:", train_set[LABEL][10], "\n"


    img = Image.new("RGB",(28,28))
    px = img.load()
    a = (train_set[FEATURE][10])
    #pretty_print(a)
    for x in xrange(28):
        for y in xrange(28):
            v = int((a[y*28 + x])*255)
            px[x,y] = (v,v,v)
    img.save('10.png')
# -*- coding: utf-8 -*-
"""
Created on Fri Feb  9 12:42:35 2018

@author: Nancy
"""

from read_mnist import load_data
# make sure read_mnist is commented/uncommented to provide correct data
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report

#loading MNIST digits dataset
train_set, valid_set, test_set = load_data()

best_k = 0
max_accuracy = 0

# loop over various values of `k` for the k-Nearest Neighbor classifier
for k in range(1, 30, 2):
    # train the k-Nearest Neighbor classifier with the current value of `k`
    model = KNeighborsClassifier(n_neighbors=k)
    model.fit(train_set[0], train_set[1])

    # evaluate the model and update the best accuracy and corresponding k
    score = model.score(valid_set[0], valid_set[1])
    if (score > max_accuracy):
        max_accuracy = score
        best_k = k

# find the value of k that has the largest accuracy
Example #6
0
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils import check_random_state

FEATURE = 0
LABEL = 1

# Author: Arthur Mensch <*****@*****.**>
# License: BSD 3 clause

# Turn down for faster convergence
t0 = time.time()
train_samples = 5000

train_set, test_set = load_data()
X_train,y_train = train_set[FEATURE][:5000],train_set[LABEL][:5000]
X_test, y_test = test_set[FEATURE][-10000:], test_set[LABEL][-10000:]
# Turn up tolerance for faster convergence
clf = LogisticRegression(C=50. / train_samples,
                         multi_class='multinomial',
                         penalty='l1', solver='saga', tol=0.1)
clf.fit(X_train, y_train)
sparsity = np.mean(clf.coef_ == 0) * 100
score = clf.score(X_test, y_test)
# print('Best C % .4f' % clf.C_)
print("Sparsity with L1 penalty: %.2f%%" % sparsity)
print("Test score with L1 penalty: %.4f" % score)

coef = clf.coef_.copy()
plt.figure(figsize=(10, 5))
Example #7
0
import numpy as np
from utils import visualise
from read_mnist import load_data
import random

y_train,x_train,y_test,x_test=load_data()
print("Train data label dim: {}".format(y_train.shape))
print("Train data features dim: {}".format(x_train.shape))
print("Test data label dim: {}".format(y_test.shape))
print("Test data features dim:{}".format(x_test.shape))

# uncomment to visualise dataset
# visualise(x_train)

def sigmoid(x):
    return 1/(1+ np.exp(-x))

def sigmoid_grad(x):
    return sigmoid(x).T @ (1 - sigmoid(x))

def softmax(x):
    for i,f in enumerate(x):
        f -= np.max(f) # for numerical stabiluty
        p = np.exp(f) / np.sum(np.exp(f))
        x[i,:]=p
    return x

def cross_entropy(X,y):
    """
    X is the output from fully connected layer (num_examples x num_classes)
    y is labels (num_examples x 1)
def main():
    """ Example of how to load and parse MNIST data. """
    
    train_set, test_set = load_data()

    # train_set is a two-element tuple. The first element, i.e.,
    # train_set[0] is a 60,000 x 784 numpy matrix. There are 60k
    # rows in the matrix, each row corresponding to a single example.
    # There are 784 columns, each corresponding to the value of a
    # single pixel in the 28x28 image.
    print "\nDimensions of training set feature matrix:", 
    print train_set[FEATURE].shape

    # The labels for each example are maintained separately in train_set[1].
    # This is a 60,000 x 1 numpy matrix, where each element is the label
    # for the corresponding training example.
    #print "\nDimensions of training set label matrix:", train_set[LABEL].shape

    # Example of how to access a individual training example (in this case,
    # the third example, i.e., the training example at index 2). We could 
    # also just use print to output it to the screen, but pretty_print formats 
    # the data in a nicer way: if you squint, you should be able to make out 
    # the number 4 in the matrix data.
    #print "\nFeatures of third training example:\n"
    #pretty_print(train_set[FEATURE][2])

    # And here's the label that goes with that training example
    #print "\nLabel of first training example:", train_set[LABEL][2], "\n"

    
    # The test_set is organized in the same way, but only contains 10k
    # examples. Don't touch this data until your model is frozen! Perform all
    # cross-validation, model selection, hyperparameter tuning etc. on the 60k
    # training set. Use the test set simply for reporting performance.

    # cross validation
    # http://scikit-learn.org/stable/modules/cross_validation.html
    
    
    #Nearest Neighbor
    #http://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html#sklearn.neighbors.KNeighborsClassifier.predict_proba
    
    X= train_set[FEATURE]
    Y=train_set[LABEL]
    kf= KFold(60000,6) #want kfold(60000,6)
    #print(kf)
    #print len(kf)
    print "creating cross validation sets...."
    for train_index, test_index in kf:
        #print("TRAIN:", train_index, "TEST:", test_index)
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = Y[train_index], Y[test_index]     
        #print X_train        
        #print X_train.shape
        #print X_test.shape
        print "Making classifier...."
        neigh = KNeighborsClassifier(n_neighbors=3)
        print "Fitting Classifier..."
        neigh.fit(X_train, y_train)
        "Calculating test scores..."
        print neigh.score(X_test, y_test, sample_weight=None)
def main():
    """ Example of how to load and parse MNIST data. """
    
    train_set, test_set = load_data()

    # train_set is a two-element tuple. The first element, i.e.,
    # train_set[0] is a 60,000 x 784 numpy matrix. There are 60k
    # rows in the matrix, each row corresponding to a single example.
    # There are 784 columns, each corresponding to the value of a
    # single pixel in the 28x28 image.
    print "\nDimensions of training set feature matrix:", 
    print train_set[FEATURE].shape

    # The labels for each example are maintained separately in train_set[1].
    # This is a 60,000 x 1 numpy matrix, where each element is the label
    # for the corresponding training example.
    #print "\nDimensions of training set label matrix:", train_set[LABEL].shape

    # Example of how to access a individual training example (in this case,
    # the third example, i.e., the training example at index 2). We could 
    # also just use print to output it to the screen, but pretty_print formats 
    # the data in a nicer way: if you squint, you should be able to make out 
    # the number 4 in the matrix data.
    #print "\nFeatures of third training example:\n"
    #pretty_print(train_set[FEATURE][2])

    # And here's the label that goes with that training example
    #print "\nLabel of first training example:", train_set[LABEL][2], "\n"

    
    # The test_set is organized in the same way, but only contains 10k
    # examples. Don't touch this data until your model is frozen! Perform all
    # cross-validation, model selection, hyperparameter tuning etc. on the 60k
    # training set. Use the test set simply for reporting performance.

    # cross validation
    # http://scikit-learn.org/stable/modules/cross_validation.html
    
    
    #Nearest Neighbor
    #http://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html#sklearn.neighbors.KNeighborsClassifier.predict_proba
    
    X= train_set[FEATURE]
    Y=train_set[LABEL]
    kf= KFold(60000,6) #want kfold(60000,6)
    #print(kf)
    #print len(kf)
    print "creating cross validation sets...."
    alpha_scores = [[],[]]
    for mult in [0.0001]:
        for const in [2]:
            alph = mult * const
            print "Testing alpha = " + str(alph)
            k = 0
            scores = []
            for train_index, test_index in kf:
                print "    k = " + str(k) 
                X_train, X_test = X[train_index], X[test_index]
                y_train, y_test = Y[train_index], Y[test_index]     
                #clf = LogisticRegression(penalty='l2', dual=False, solver="lbfgs")
                clf = SGDClassifier(loss='log',alpha=alph, shuffle=False, n_iter=50)
                clf.fit(X_train, y_train)
                score = clf.score(X_test, y_test, sample_weight=None)
                print score
                scores.append(score)
                k+=1
            avg_score = sum(scores)/(float(len(scores)))
            print "    Average Score: "+ str(avg_score)
            alpha_scores[0].append(alph)
            alpha_scores[1].append(avg_score)

    for i in range(len(alpha_scores[0])):
        print "alpha: " + str(alpha_scores[0][i]) + "    score: " + str(alpha_scores[1][i])

    fig = plt.figure()
    ax = plt.gca()
    ax.plot(alpha_scores[0],alpha_scores[1], c='red')
    ax.set_xscale('log')
    plt.title("Alpha Exploration - Log Regression")
    plt.xlabel("Alpha")
    plt.ylabel("Score")
    plt.show()