def softmax_objective(X, W, C): bias_row = np.ones(X.shape[1]) X = np.vstack([X, bias_row]) m = X.shape[1] l = W.shape[1] Xt = X.transpose() XtW = np.matmul(Xt, W) eta = XtW.max(axis=1) # pumping into matrix with shape (m,l) eta = grads.pump(eta, m, l) num = np.exp(XtW - eta) # summing rows to get denominator den = num.sum(axis=1) # pumping into matrix with shape (m,l) den = grads.pump(den, m, l) logDXtW = np.log(num / den) res = 0 for i in range(l): res += sum(C[i, :] * logDXtW[:, i]) return res * (-1 / m)
def rearrange_labels(C, c_valid): labels = np.arange(C.shape[0]) training_labels = grads.pump(labels, C.shape[0], C.shape[1]) c_training = (C * training_labels).sum(axis=0) validation_labels = grads.pump(labels, c_valid.shape[0], c_valid.shape[1]) c_validation = (c_valid * validation_labels).sum(axis=0) return c_training, c_validation
def predict(W, X, B): x_i = X for i in range(B.shape[0]): # running forward pass mul_res = np.matmul(W[i], x_i) x_i = ReLU(mul_res + grads.pump(B[i], mul_res.shape[0], mul_res.shape[1])) prob = sgd.softmax(x_i, np.transpose(W[-1])) res = prob.argmax(axis=1) return res
def forward_propagation(W, X, B): relu_derivatives = [] x_history = [] x_i = X x_history.append(x_i) for i in range(B.shape[0]): mul_res = np.matmul(W[i], x_i) x_i = ReLU(mul_res + grads.pump( B[i], mul_res.shape[0], mul_res.shape[1])) # activation function x_history.append(x_i) # used later for back propagation relu_derivatives.append(x_i > 0) return relu_derivatives, x_history
def softmax(X, W): bias_row = np.ones(X.shape[1]) X = np.vstack([X, bias_row]) m = X.shape[1] l = W.shape[1] Xt = X.transpose() XtW = np.matmul(Xt, W) eta = XtW.max(axis=1) # pumping into matrix with shape (m,l) eta = grads.pump(eta, m, l) num = np.exp(XtW - eta) # summing rows to get denominator den = num.sum(axis=1) # pumping into matrix with shape (m,l) den = grads.pump(den, m, l) return num / den