def forward_backward_prop(data, labels, params): """ Forward and backward propagation for a two-layer sigmoidal network """ ################################################################### # Compute the forward propagation and for the cross entropy cost, # # and backward propagation for the gradients for all parameters. # ################################################################### ### Unpack network parameters (do not modify) t = 0 W1 = np.reshape(params[t:t + dimensions[0] * dimensions[1]], (dimensions[0], dimensions[1])) t += dimensions[0] * dimensions[1] b1 = np.reshape(params[t:t + dimensions[1]], (1, dimensions[1])) t += dimensions[1] W2 = np.reshape(params[t:t + dimensions[1] * dimensions[2]], (dimensions[1], dimensions[2])) t += dimensions[1] * dimensions[2] b2 = np.reshape(params[t:t + dimensions[2]], (1, dimensions[2])) ### YOUR CODE HERE: forward propagation N, D = data.shape h = sigmoid(data.dot(W1) + b1) scores = softmax(h.dot(W2) + b2) cost = np.sum(-np.log(scores[labels == 1])) / N ### END YOUR CODE ### YOUR CODE HERE: backward propagation dscores = scores - labels dscores /= N gradb2 = np.sum(dscores, axis=0) gradW2 = np.dot(h.T, dscores) gradh = np.dot(dscores, W2.T) gradh = sigmoid_grad(h) * gradh gradb1 = np.sum(gradh, axis=0) gradW1 = np.dot(data.T, gradh) ### END YOUR CODE ### Stack gradients (do not modify) grad = np.concatenate((gradW1.flatten(), gradb1.flatten(), gradW2.flatten(), gradb2.flatten())) return cost, grad
def softmaxRegression(features, labels, weights, regularization=0.0, nopredictions=False): """ Softmax Regression """ ################################################################### # Implement softmax regression with weight regularization. # # Inputs: # # - features: feature vectors, each row is a feature vector # # - labels: labels corresponding to the feature vectors # # - weights: weights of the regressor # # - regularization: L2 regularization constant # # Output: # # - cost: cost of the regressor # # - grad: gradient of the regressor cost with respect to its # # weights # # - pred: label predictions of the regressor (you might find # # np.argmax helpful) # ################################################################### prob = softmax(features.dot(weights)) if len(features.shape) > 1: N = features.shape[0] else: N = 1 # A vectorized implementation of 1/N * sum(cross_entropy(x_i, y_i)) + 1/2*|w|^2 cost = np.sum(-np.log(prob[range(N), labels])) / N cost += 0.5 * regularization * np.sum(weights**2) ### YOUR CODE HERE: compute the gradients and predictions dscores = prob.copy() dscores[range(N), labels] -= 1 dscores /= N grad = features.T.dot(dscores) + regularization * weights pred = np.argmax(prob, axis=1) ### END YOUR CODE if nopredictions: return cost, grad else: return cost, grad, pred
def softmaxRegression(features, labels, weights, regularization = 0.0, nopredictions = False): """ Softmax Regression """ ################################################################### # Implement softmax regression with weight regularization. # # Inputs: # # - features: feature vectors, each row is a feature vector # # - labels: labels corresponding to the feature vectors # # - weights: weights of the regressor # # - regularization: L2 regularization constant # # Output: # # - cost: cost of the regressor # # - grad: gradient of the regressor cost with respect to its # # weights # # - pred: label predictions of the regressor (you might find # # np.argmax helpful) # ################################################################### prob = softmax(features.dot(weights)) if len(features.shape) > 1: N = features.shape[0] else: N = 1 # A vectorized implementation of 1/N * sum(cross_entropy(x_i, y_i)) + 1/2*|w|^2 cost = np.sum(-np.log(prob[range(N), labels])) / N cost += 0.5 * regularization * np.sum(weights ** 2) ### YOUR CODE HERE: compute the gradients and predictions dscores = prob.copy() dscores[range(N), labels] -= 1 dscores /= N grad = features.T.dot(dscores) + regularization*weights pred = np.argmax(prob, axis = 1) ### END YOUR CODE if nopredictions: return cost, grad else: return cost, grad, pred
def softmaxCostAndGradient(predicted, target, outputVectors): """ Softmax cost function for word2vec models """ ################################################################### # Implement the cost and gradients for one predicted word vector # # and one target word vector as a building block for word2vec # # models, assuming the softmax prediction function and cross # # entropy loss. # # Inputs: # # - predicted: numpy ndarray, predicted word vector (\hat{r} in # # the written component) (V_wi) # # - target: integer, the index of the target word # # - outputVectors: "output" vectors for all tokens # # Outputs: # # - cost: cross entropy cost for the softmax word prediction # # - gradPred: the gradient with respect to the predicted word # # vector # # - grad: the gradient with respect to all the other word # # vectors # # We will not provide starter code for this function, but feel # # free to reference the code you previously wrote for this # # assignment! # ################################################################### ### YOUR CODE HERE V, D = outputVectors.shape scores = softmax(outputVectors.dot(predicted).reshape(1,V)).reshape(V,) cost = -np.log(scores[target]) labels = np.zeros(V) labels[target] = 1 dscores = scores - labels gradPred = dscores.dot(outputVectors) grad = dscores.reshape(V, 1).dot(predicted.reshape(D, 1).T) ### END YOUR CODE return cost, gradPred, grad
def conv(image, label, params, conv_s, pool_f, pool_s): ''' Combine the forward and backward propogation to build a method that takes the input parameters and hyperparamets as inputs and outputs gradient and loss ''' [f1, f2, w3, w4, b1, b2, b3, b4] = params #filters, weights and biases ############################################# ###########Forward operation################# ############################################# conv1 = convolution(image, f1, b1, conv_s) conv1[conv1 <= 0] = 0 #apply ReLU non-linearity cov2 = convolution(conv1, f2, b2, conv_s) conv2[conv2 <= 0] = 0 pooled = maxpool(conv2, pool_f, pool_s) #maxpooling (nf2, dim2, _) = pooled.shape fc = pooled.reshape((nf2 * dim2 * dim2, 1)) #flatten pooled layer z = w3.dot( fc) + b3 #pass flattened pool through first fully connected layer z[z <= 0] = 0 #pass through ReLU function out = w4.dot(z) + b4 #pass through second layer probs = softmax( out ) #apply softmax activation function to find prodicted probabilities ############################################# ############# Loss ########################## ############################################# loss = categoricalCrossEntropy(probs, label) ############################################## ############ Backward operation ############## ############################################## d_out = probs - label #derivate of loss w.r.t final dense layer dw4 = d_out.dot(z.T) #loss gradient weights db4 = np.sum(d_out, axis=1).reshape(b4.shape) #loss gradient of biases dz = w4.T.dot(d_out) #loss gradient of first dense layer outputs dz[z <= 0] = 0 #ReLU dw3 = dz.dot(fc.T) #loss function os weights db3 = np.sum(dz, axis=1).reshape(b3.shape) dfc = w3.T.dot(dz) #loss gradient of fully connested pooling layer dpool = dfc.reshape( pooled.shape) #reshape into into dimension of pooling layer dconv2 = maxoolBackward(dpool, conv2, pool_f, pool_s) dconv2[conv2 <= 0] = 0 dconv1, df2, db2 = convolutionBackward(dconv2, conv1, f2, conv_s) dconv1[conv1 <= 0] = 0 dimage, df1, db1 = convolutionBackward(dconv1, image, f1, convs) grads = [df1, df2, dw3, dw4, db1, db2, db3, db4] return grads, loss