Beispiel #1
0
def softmax_cross_entropy_loss(Z, Y=np.array([])):
    '''
    Computes the softmax activation of the inputs Z
    Estimates the cross entropy loss

    Inputs: 
        Z - numpy.ndarray (n, m)
        Y - numpy.ndarray (1, m) of labels
            when y=[] loss is set to []
    
    Returns:
        A - numpy.ndarray (n, m) of softmax activations
        cache -  a dictionary to store the activations later used to estimate derivatives
        loss - cost of prediction
    '''
    ### CODE HERE
    maxz = np.max(Z, axis=0, keepdims=True)
    A = np.exp(Z - maxz)
    A = A / np.sum(A, axis=0, keepdims=True)
    #print("Activations")
    cache_Activation = {}
    cache_Activation["Activation"] = A

    one_hot_vector = one_hot(Y.astype(int), 10)
    #print("one hot vector of Y")
    #print(one_hot_vector)
    #print(one_hot_vector.shape)
    #print(A.shape)

    loss = -np.sum(one_hot_vector.T * np.log(A)) / (Y.shape[1])
    #print(Y.shape[1])
    #print("cross entropy loss")
    #print(loss)

    return A, cache_Activation, loss
Beispiel #2
0
def neuralNetwork(X, Y, lDim, nEpoch, alpha, Xdev, Ydev, k, miniBatchSize):

    parameters = initParameter(lDim)

    v, s = initParameterAdam(parameters)

    costTrain = []
    costDev = []
    tAdam = 0

    for i in range(nEpoch):

        miniBatch = initRandomMiniBatch(X, Y, miniBatchSize)

        for (miniBatchX, miniBatchY) in miniBatch:

            AL, cache = forwardPropagation(miniBatchX, parameters)

            costTr = costNN(AL, one_hot(miniBatchY.astype(int), 10))

            gradients = backPropagation(AL, one_hot(miniBatchY.astype(int),
                                                    10), parameters, cache, k)

            tAdam = tAdam + 1
            parameters, v, s = optimizerAdam(parameters, gradients, alpha, v,
                                             s, tAdam)

        costTrain.append(costTr)

        costD = errorDev(Xdev, one_hot(Ydev.astype(int), 10), parameters)

        costDev.append(costD)

        Ypred, _ = predictClass(Xdev, parameters)

        print((accuracy(Ydev, Ypred)))

    return parameters, costTrain, costDev
Beispiel #3
0
def softmax_cross_entropy_loss_der(Y, cache):
    '''
    Computes the derivative of softmax activation and cross entropy loss

    Inputs: 
        Y - numpy.ndarray (1, m) of labels
        cache -  a dictionary with cached activations A of size (n,m)

    Returns:
        dZ - numpy.ndarray (n, m) derivative for the previous layer
    '''
    ### CODE HERE
    one_hot_vector = one_hot(Y.astype(int), 10)

    #print(one_hot_vector.shape)

    dZ = (cache["Activation"] - one_hot_vector.T) / Y.shape[1]

    return dZ
Beispiel #4
0
def main():

    trainDevX, trainDevY, testX, testY = \
            mnist(noTrSamples=50000,noTsSamples=5000,\
            digit_range=[0,1,2,3,4,5,6,7,8,9],\
            noTrPerClass=5000, noTsPerClass=500)

    perm = np.random.permutation(trainDevX.shape[1])
    trainDevX = trainDevX[:, perm]
    trainDevY = trainDevY[:, perm]
    devX = trainDevX[:, 45000:50000]
    devY = trainDevY[:, 45000:50000]
    trainX = trainDevX[:, 0:45000]
    trainY = trainDevY[:, 0:45000]

    #trainY = one_hot(trainY.astype(int), 10)
    #devY = one_hot(devY.astype(int), 10)
    #testY = one_hot(testY.astype(int), 10)

    lDim = [784, 500, 10]
    miniBatchSize = 100
    k = [[50], [100]]
    nEpoch = 200
    alpha = 0.001
    costHL = []
    accTest = []
    errDev = []
    errTest = []

    for i in range(len(k)):
        print(k[i])
        parameters, costTrain, costDev, t = neuralNetwork(
            trainX, trainY, lDim, nEpoch, alpha, devX, devY, k[i],
            miniBatchSize)
        costHL.append((costTrain, costDev))
        errDev.append(costDev[nEpoch - 1])
        Ypred, _ = predictClass(devX, parameters)
        accDev.append(accuracy(devY, Ypred))
        Ypred, AL = predictClass(testX, parameters)
        accTest.append(accuracy(testY, Ypred))
        errTest.append(costNN(AL, one_hot(testY.astype(int), 10)))
Beispiel #5
0
def main():
    '''
    Trains a multilayer network for MNIST digit classification (all 10 digits)
    To create a network with 1 hidden layer of dimensions 800
    Run the progam as:
        python deepMultiClassNetwork_starter.py "[784,800]"
    The network will have the dimensions [784,800,10]
    784 is the input size of digit images (28pix x 28pix = 784)
    10 is the number of digits

    To create a network with 2 hidden layers of dimensions 800 and 500
    Run the progam as:
        python deepMultiClassNetwork_starter.py "[784,800,500]"
    The network will have the dimensions [784,800,500,10]
    784 is the input size of digit images (28pix x 28pix = 784)
    10 is the number of digits
    '''

    net_dims = ast.literal_eval(sys.argv[1])
    net_dims.append(10)  # Adding the digits layer with dimensionality = 10
    print("Network dimensions are:" + str(net_dims))

    # getting the subset dataset from MNIST
    train_data, train_label, test_data, test_label = \
            mnist(noTrSamples=6000,noTsSamples=1000,\
            digit_range=[0,1,2,3,4,5,6,7,8,9],\
            noTrPerClass=600, noTsPerClass=100)

    valid_data, valid_label = train_data[:, 5000:6000], train_label[:,
                                                                    5000:6000]

    # initialize learning rate and num_iterations
    learning_rate = 10.0
    num_iterations = 1000
    num_train_samples = 5000
    num_validate_samples = 1000
    num_test_samples = 1000


    costs, parameters ,costs_valid = multi_layer_network(train_data, train_label,valid_data,valid_label, net_dims, \
            num_iterations=num_iterations, learning_rate= learning_rate, decay_rate = 0.0)

    # compute the accuracy for training set and testing set
    train_Pred = classify(train_data, parameters)
    test_Pred = classify(test_data, parameters)

    #print (train_label.shape)
    #print(train_Pred.shape)
    #print (train_label)
    #print(train_Pred)
    train_label_new = one_hot(train_label.astype(int), 10)
    test_label_new = one_hot(test_label.astype(int), 10)

    result_train = np.sum(abs(train_Pred - train_label_new.T))
    print("Train error is : %f" % (result_train))
    result_test = np.sum(abs(test_Pred - test_label_new.T))
    print("Test error is : %f" % (result_test))

    trAcc = 1 / num_train_samples * np.sum(num_train_samples -
                                           result_train) * 100

    teAcc = 1 / num_test_samples * np.sum(num_test_samples - result_test) * 100

    print("Accuracy for training set is {0:0.3f} %".format(trAcc))
    print("Accuracy for testing set is {0:0.3f} %".format(teAcc))

    ### CODE HERE to plot costs
    x = range(0, int(num_iterations / 10))
    plt.plot(x, costs)
    plt.plot(x, costs_valid)
    plt.xlabel('iterations')
    plt.ylabel('Costs')
    plt.title('Training and validation')
    plt.show()
Beispiel #6
0
def multi_layer_network(X,
                        Y,
                        valid_data,
                        valid_label,
                        net_dims,
                        num_iterations,
                        learning_rate,
                        decay_rate=0.01):  # changed the default values
    '''
    Creates the multilayer network and trains the network

    Inputs:
        X - numpy.ndarray (n,m) of training data
        Y - numpy.ndarray (1,m) of training data labels
        net_dims - tuple of layer dimensions
        num_iterations - num of epochs to train
        learning_rate - step size for gradient descent
    
    Returns:
        costs - list of costs over training
        parameters - dictionary of trained network parameters
    '''
    parameters = initialize_multilayer_weights(net_dims)
    #print("shape of the parameters")
    #print (len(parameters))
    A0 = X
    costs = []
    costs_valid = []
    for ii in range(num_iterations):
        ### CODE HERE
        # Forward Prop
        ## call to multi_layer_forward to get activations
        AL, caches = multi_layer_forward(X, parameters)
        #print("cache is")
        #print(len(cache))
        #print(cache[2])
        #print("A last layer")
        #print(AL)
        ## call to softmax cross entropy loss
        A, cache_Activation, cost = softmax_cross_entropy_loss(AL, Y)

        #print("Activation")
        #print(A)
        #print("cross entropy loss")
        #print(loss)

        dZ = softmax_cross_entropy_loss_der(Y, cache_Activation)
        #print("softmax derivative")
        #print(dZ)

        grad = multi_layer_backward(dZ, caches, parameters)

        #print(grad)

        parameters, alpha = update_parameters(parameters,
                                              grad,
                                              num_iterations,
                                              learning_rate,
                                              decay_rate=0.01)

        #print("after update parameters")
        #print(parameters)
        #print(gradients)

        # Backward Prop
        ## call to softmax cross entropy loss der
        ## call to multi_layer_backward to get gradients
        ## call to update the parameters

        if ii % 10 == 0:
            costs.append(cost)
        if ii % 10 == 0:
            AL_valid, caches_valid = multi_layer_forward(
                valid_data, parameters)
            valid_pred = classify(valid_data, parameters)
            x1, cache_Activation, vaild_loss = softmax_cross_entropy_loss(
                AL_valid, valid_label)
            costs_valid.append(vaild_loss)
            valid_label_new = one_hot(valid_label.astype(int), 10)
            result_valid = np.sum(np.abs(valid_pred - valid_label_new.T))
            print("validation error is : %f" % (result_valid))

            print("Cost at iteration %i is: %.05f, learning rate: %.05f" %
                  (ii, cost, learning_rate))

    return costs, parameters, costs_valid