def softmax_with_cross_entropy(preds, target_index): """ Computes softmax and cross-entropy loss for model predictions, including the gradient Arguments: preds: np array, shape is either (N) or (batch_size, N) - classifier output target_index: np array of int, shape is (1) or (batch_size) - index of the true class for given sample(s) Returns: loss, single value - cross-entropy loss d_preds, np array same shape as predictions - gradient of predictions by loss value """ # TODO_: Copy from the previous assignment # raise Exception("Not implemented!") preds = preds.copy() probs = softmax(preds) loss = cross_entropy_loss(probs, target_index).mean() mask = np.zeros_like(preds) mask[np.arange(len(mask)), target_index] = 1 # mask[target_index] = 1 d_preds = - (mask - softmax(preds)) / mask.shape[0] return loss, d_preds
def predict(self, X): # You can probably copy the code from previous assignment # raise Exception("Not implemented!") out = self.conv1.forward(X) out = self.relu1.forward(out) out = self.maxpool1.forward(out) out = self.conv2.forward(out) out = self.relu2.forward(out) out = self.maxpool2.forward(out) out = self.flatten.forward(out) out = self.fc.forward(out) probs = softmax(out) y_pred = np.argmax(probs, axis=1) return y_pred
def test_cross_entropy_loss(self): f = cross_entropy_loss num_classes = 4 batch_size = 5 probs = softmax(np.random.randint(-100, 100, (batch_size, num_classes))) targets = np.random.randint(0, num_classes - 1, (batch_size, ), dtype=np.int) sum = 0 for i in range(batch_size): sum += f(probs[i], targets[i]) print(sum / batch_size, f(probs, targets)) output = f(probs, targets) self.assertEqual(sum / batch_size, output) self.assertIsInstance(output, float)
def predict(self, X): """ Produces classifier predictions on the set Arguments: X, np array (test_samples, num_features) Returns: y_pred, np.array of int (test_samples) """ # TODO: Implement predict # Hint: some of the code of the compute_loss_and_gradients # can be reused y_pred = np.zeros(X.shape[0], np.int) predictions = self.forward_pass(X) pred = lc.softmax(predictions) #print (pred.shape) y_pred = np.argmax(pred, axis=1) #raise Exception("Not implemented!") return y_pred
def predict(self, X): """ Produces classifier predictions on the set Arguments: X, np array (test_samples, num_features) Returns: y_pred, np.array of int (test_samples) """ # TODO: Implement predict # Hint: some of the code of the compute_loss_and_gradients # can be reused pred = np.zeros(X.shape[0], np.int) #raise Exception("Not implemented!") X1 = self.Dense1.forward(X) X_relu = self.Relu.forward(X1) X2 = self.Dense2.forward(X_relu) probs = softmax(X2) pred = np.argmax(probs, axis=1) return pred
def predict(self, X): """ Produces classifier predictions on the set Arguments: X, np array (test_samples, num_features) Returns: y_pred, np.array of int (test_samples) """ # TODO_: Implement predict # Hint: some of the code of the compute_loss_and_gradients # can be reused y_pred = np.zeros(X.shape[0], np.int) # raise Exception("Not implemented!") out1 = self.layer1.forward(X) out_relu = self.relu_layer.forward(out1) predictions = self.layer2.forward(out_relu) probs = softmax(predictions) y_pred = np.argmax(probs, axis=1) return y_pred
# $$ # \sigma(z)_j = \frac{e^{z_j}}{\displaystyle\sum_{k=1}^K e^{z_k}} # $$ # # **Важно:** Практический аспект вычисления этой функции заключается в том, что в ней учавствует вычисление экспоненты от потенциально очень больших чисел - это может привести к очень большим значениям в числителе и знаменателе за пределами диапазона float. # # К счастью, у этой проблемы есть простое решение -- перед вычислением softmax вычесть из всех оценок максимальное значение среди всех оценок: # ``` # predictions -= np.max(predictions) # ``` # ([подробнее здесь](http://cs231n.github.io/linear-classify/#softmax), секция `Practical issues: Numeric stability`) #%% from linear_classifer import softmax probs = softmax(np.array([[-10, 0, 10]])) assert np.isclose(np.sum(probs), 1.0) # Make sure it works for big numbers too! probs = softmax(np.array([[1000, 0, 0]])) assert np.isclose(probs[0][0], 1.0) #%% [markdown] # Кроме этого, мы реализуем cross-entropy loss, которую мы будем использовать как функцию ошибки (error function). # В общем виде cross-entropy определена следующим образом: # # $$ # H(p,q) = -\displaystyle\sum_x p(x)\,\log q(x). # $$ #
def array_sum(x): assert x.shape == (2, ), x.shape return np.sum(x), np.ones_like(x) check_gradient(array_sum, np.array([3.0, 2.0])) def array_2d_sum(x): assert x.shape == (2, 2) return np.sum(x), np.ones_like(x) check_gradient(array_2d_sum, np.array([[3.0, 2.0], [1.0, 0.0]])) # TODO Implement softmax and cross-entropy for single sample probs = linear_classifer.softmax(np.array([-10, 0, 10])) # Make sure it works for big numbers too! probs = linear_classifer.softmax(np.array([1000, 0, 0])) assert np.isclose(probs[0], 1.0) # My test batch softmax probs = linear_classifer.softmax( np.array([[-10, 0, 10], [30, 4, 5], [2, 6, 8]])) probs = linear_classifer.softmax(np.array([-5, 0, 5])) linear_classifer.cross_entropy_loss(probs, 1)
return np.sum(x), np.ones_like(x) check_gradient(array_sum, np.array([3.0, 2.0])) def array_2d_sum(x): assert x.shape == (2, 2) return np.sum(x), np.ones_like(x) check_gradient(array_2d_sum, np.array([[3.0, 2.0], [1.0, 0.0]])) # TODO Implement softmax and cross-entropy for single sample probs = linear_classifer.softmax(np.array([-10, 0, 10])) # Make sure it works for big numbers too! probs = linear_classifer.softmax(np.array([1000, 0, 0])) assert np.isclose(probs[0], 1.0) probs = linear_classifer.softmax(np.array([-5, 0, 5])) print(linear_classifer.cross_entropy_loss(probs, 1)) loss, grad = linear_classifer.softmax_with_cross_entropy( np.array([1, 0, 0]), 1 ) check_gradient( lambda x: linear_classifer.softmax_with_cross_entropy(x, 1),