def test_static_softmax_integer(): # reuse the test cases from below with integer arrays skew = np.array([0.87566484, 0.53596079, 0.85693981, 0.09526036]) x = Tensor([0, 1, 2, 3]) f = (softmax(x, constant=False) * skew).sum() out = np.array(0.33911235096116465) assert_allclose(actual=f.data, desired=out) f.backward() dx = np.array([0.01720112, 0.01715422, 0.12266443, -0.15701977]) assert_allclose(x.grad, dx, atol=1e-5, rtol=1e-5) skew = np.array([ [0.87566484, 0.53596079, 0.85693981, 0.09526036], [0.32024455, 0.81532148, 0.2480434, 0.85119342], [0.57943085, 0.33958252, 0.95864464, 0.22881712], ]) x = Tensor([[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]]) f = (softmax(x, constant=False) * skew).sum() out = np.array(1.449875865467131) assert_allclose(actual=f.data, desired=out) f.backward() dx = np.array([ [0.01720112, 0.01715422, 0.12266443, -0.15701977], [-0.01179518, 0.01108053, -0.10425844, 0.10497309], [0.00502799, -0.00723393, 0.12698131, -0.12477536], ]) assert_allclose(x.grad, dx, atol=1e-5, rtol=1e-5)
def test_focal_loss(num_datum, num_classes, alpha, gamma, data, grad, target_type): scores = data.draw( hnp.arrays(shape=(num_datum, num_classes), dtype=float, elements=st.floats(1, 100))) assume((abs(scores.sum(axis=1)) > 0.001).all()) scores_mygrad = Tensor(scores) scores_nn = Tensor(scores) truth = np.zeros((num_datum, num_classes)) targets = data.draw( st.tuples(*(st.integers(0, num_classes - 1) for i in range(num_datum)))) truth[range(num_datum), targets] = 1 targets = target_type(targets) fl = focal_loss(softmax(scores_mygrad), targets, alpha=alpha, gamma=gamma).mean() fl.backward(grad) nn_loss = softmax_focal_loss(scores_nn, targets, alpha=alpha, gamma=gamma).mean() nn_loss.backward(grad) assert isinstance(nn_loss, Tensor) and nn_loss.ndim == 0 assert_allclose(nn_loss.data, fl.data, atol=1e-4, rtol=1e-4) assert_allclose(scores_nn.grad, scores_mygrad.grad, atol=1e-4, rtol=1e-4) nn_loss.null_gradients() assert scores_nn.grad is None
def test_static_softmax2d(): # Verified against theano.tensor.softmax skew = np.array([ [0.87566484, 0.53596079, 0.85693981, 0.09526036], [0.32024455, 0.81532148, 0.2480434, 0.85119342], [0.57943085, 0.33958252, 0.95864464, 0.22881712], ]) x = np.array([[0.0, 1.0, 2.0, 3.0], [4.0, 5.0, 6.0, 7.0], [8.0, 9.0, 10.0, 11.0]]) x = Tensor(x) f = (softmax(x, constant=False) * skew).sum() out = np.array(1.449875865467131) assert_allclose(actual=f.data, desired=out) f.backward() dx = np.array([ [0.01720112, 0.01715422, 0.12266443, -0.15701977], [-0.01179518, 0.01108053, -0.10425844, 0.10497309], [0.00502799, -0.00723393, 0.12698131, -0.12477536], ]) assert_allclose(x.grad, dx, atol=1e-5, rtol=1e-5)
def step(self, tetrisBoards, executor, done): # calculates and stores derivatives for i, tetrisBoard in enumerate(tetrisBoards): if done[i]: continue #temp = time.time() data = self.preprocess(tetrisBoard) #print("Preprocess: {}".format(time.time() - temp)) #temp = time.time() outNeurons = self.model.policyForward(data) probabilities = softmax(outNeurons) #print("Forward: {}".format(time.time() - temp)) #temp = time.time() choice = np.random.choice(self.cache['arange'], p=probabilities) #print("Choice: {}".format(time.time() - temp)) #temp = time.time() loss = softmax_crossentropy(outNeurons.reshape(1, len(outNeurons)), [choice]) loss.backward() #print("Backprop: {}".format(time.time() - temp)) #temp = time.time() self.derivatives[i].append(self.model.getDerivatives()) loss.null_gradients() #print("Store: {}".format(time.time() - temp)) executor(tetrisBoard, choice)
def test_softmax_crossentropy(data: st.DataObject, labels_as_tensor: bool): s = data.draw( hnp.arrays( shape=hnp.array_shapes(max_side=10, min_dims=2, max_dims=2), dtype=float, elements=st.floats(-100, 100), )) y_true = data.draw( hnp.arrays( shape=(s.shape[0], ), dtype=hnp.integer_dtypes(), elements=st.integers(min_value=0, max_value=s.shape[1] - 1), ).map(Tensor if labels_as_tensor else lambda x: x)) scores = Tensor(s) softmax_cross = softmax_crossentropy(scores, y_true, constant=False) softmax_cross.backward() mygrad_scores = Tensor(s) probs = softmax(mygrad_scores) correct_labels = (range(len(y_true)), y_true.data if labels_as_tensor else y_true) truth = np.zeros(mygrad_scores.shape) truth[correct_labels] = 1 mygrad_cross = (-1 / s.shape[0]) * (log(probs) * truth).sum() mygrad_cross.backward() assert_allclose(softmax_cross.data, mygrad_cross.data, atol=1e-5, rtol=1e-5) assert_allclose(scores.grad, mygrad_scores.grad, atol=1e-5, rtol=1e-5)
def test_softmax_crossentropy(data): """ Test the built-in implementation of multiclass hinge against the pure pygrad version""" s = data.draw( hnp.arrays( shape=hnp.array_shapes(max_side=10, min_dims=2, max_dims=2), dtype=float, elements=st.floats(-100, 100), )) l = data.draw( hnp.arrays( shape=(s.shape[0], ), dtype=hnp.integer_dtypes(), elements=st.integers(min_value=0, max_value=s.shape[1] - 1), )) scores = Tensor(s) softmax_cross = softmax_crossentropy(scores, l, constant=False) softmax_cross.backward() pygrad_scores = Tensor(s) probs = softmax(pygrad_scores) correct_labels = (range(len(l)), l) truth = np.zeros(pygrad_scores.shape) truth[correct_labels] = 1 pygrad_cross = (-1 / s.shape[0]) * (log(probs) * truth).sum() pygrad_cross.backward() assert_allclose(softmax_cross.data, pygrad_cross.data, atol=1e-5, rtol=1e-5) assert_allclose(scores.grad, pygrad_scores.grad, atol=1e-5, rtol=1e-5)
def numpy_softmax_focal_loss(scores: np.ndarray, targets: np.ndarray, alpha: float, gamma: float) -> np.ndarray: if isinstance(targets, mg.Tensor): targets = targets.data scores = softmax(scores).data rows = np.arange(len(scores)) pc = scores[rows, targets] return -alpha * np.clip(1 - pc, a_min=0, a_max=1)**gamma * np.log(pc)
def softmax_focal_loss(scores, targets, *, alpha=1, gamma=0, constant=False): r""" Applies the softmax normalization to the input scores before computing the per-datum focal loss. Parameters ---------- scores : mygrad.Tensor, shape=(N, C) The C class scores for each of the N pieces of data. targets : Sequence[int], shape=(N,) The correct class indices, in [0, C), for each datum. alpha : Real, optional (default=1) The ɑ weighting factor in the loss formulation. gamma : Real, optional (default=0) The ɣ focusing parameter. Note that for Ɣ=0 and ɑ=1, this is cross-entropy loss. Must be a non-negative value. constant : bool, optional(default=False) If ``True``, the returned tensor is a constant (it does not back-propagate a gradient) Returns ------- mygrad.Tensor, shape=(N,) The per-datum focal loss. Notes ----- The formulation for the focal loss introduced in https://arxiv.org/abs/1708.02002. It is given by -ɑ(1-p)ˠlog(p). The focal loss for datum-:math:`i` is given by .. math:: -\alpha \hat{y}_i(1-p_i)^\gamma\log(p_i) where :math:`\hat{y}_i` is one in correspondence to the label associated with the datum and 0 elsewhere. That is, if the label :math:`y_k` is 2 and there are four possible label values, then :math:`\hat{y}_k = (0, 0, 1, 0)`. It is recommended in the paper that you normalize by the number of foreground samples. """ return focal_loss(softmax(scores), targets=targets, alpha=alpha, gamma=gamma, constant=constant)
def test_static_softmax1d(): # Verified against theano.tensor.softmax skew = np.array([0.87566484, 0.53596079, 0.85693981, 0.09526036]) x = np.array([0., 1., 2., 3.]) x = Tensor(x) f = (softmax(x, constant=False) * skew).sum() out = np.array(0.33911235096116465) assert_allclose(actual=f.data, desired=out) f.backward() dx = np.array([0.01720112, 0.01715422, 0.12266443, -0.15701977]) assert_allclose(x.grad, dx, atol=1e-5, rtol=1e-5)
def step(self, tetrisBoards, done, executor): boards = [(i, board) for i, board in enumerate(tetrisBoards) if not done[i]] for i, board in boards: outNeurons = self.model.policyForward( self.preprocess(board)[np.newaxis, np.newaxis, :, :]) probabilities = softmax(outNeurons) choice = np.random.choice(5, p=probabilities.reshape(5)) loss = softmax_crossentropy(outNeurons, np.array([choice])) loss.backward() self.derivatives.append(self.model.getDerivatives()) if not board in self.rewardMap: self.rewardMap[board] = [] self.rewardMap[board].append(len(self.rewards)) self.rewards.append(None) # Temp placeholder loss.null_gradients() executor(board, choice)
def test_static_softmax(): # Verified against theano.tensor.softmax skew = np.array([[ 0.87566484, 0.53596079, 0.85693981, 0.09526036], [ 0.32024455, 0.81532148, 0.2480434 , 0.85119342], [ 0.57943085, 0.33958252, 0.95864464, 0.22881712]]) x = np.array([[ 0., 1., 2., 3.], [ 4., 5., 6., 7.], [ 8., 9., 10., 11.]]) x = Tensor(x) f = (softmax(x) * skew).sum() out = np.array(1.449875865467131) assert np.allclose(f.data, out) f.backward() dx = np.array([[ 0.01720112, 0.01715422, 0.12266443, -0.15701977], [-0.01179518, 0.01108053, -0.10425844, 0.10497309], [ 0.00502799, -0.00723393, 0.12698131, -0.12477536]]) assert np.allclose(x.grad, dx)
def step(self, tetrisBoards, done, executor): boards = [(i, board) for i, board in enumerate(tetrisBoards) if not done[i]] stacked = np.stack([self.preprocess(board) for i, board in boards])[:, np.newaxis, :, :] outNeurons = self.model.policyForward(stacked) probabilities = softmax(outNeurons) choices = self.sample(probabilities.data) assert len(choices) == len(boards) loss = softmax_crossentropy(outNeurons, choices) loss.backward() # Store gradients converted = self.convert(self.model.getDerivatives()) print(len(converted)) self.derivatives.extend(converted) # Derivatives are batched self.rewards.append([1 if choice == 0 else 0 for choice in choices]) print(len(self.model.getDerivatives())) print(len(self.rewards[-1])) assert len(self.derivatives[-1]) == len(self.rewards[-1]) # Calculate rewards array loss_null_gradients() for choice, i, board in zip(choices, *boards): executor(board, choice)
def test_softmax_numerical_stability(x: np.ndarray, data: st.DataObject): axis = data.draw(valid_axes(x.ndim), label="axis") out = softmax(x, axis=axis).data assert np.all(np.logical_and(0 <= out, out <= 1)) assert_allclose(out.sum(axis=axis), 1.0)