def smooth_svm_py(x, y, tau): x, y = to_numpy(x), to_numpy(y) n_samples, n_classes = x.shape scores = x + np.not_equal(np.arange(n_classes)[None, :], y[:, None]) - \ x[np.arange(n_samples), y][:, None] loss = tau * np.mean(sp.logsumexp(scores / tau, axis=1)) return loss
def _test_backward(input_shape, reduction, axis): layer = SoftmaxCrossEntropyLossLayer(reduction=reduction) data = np.random.random(input_shape) * 2 - 1 labels_shape = list(data.shape) labels_shape.pop(axis) labels = np.random.randint(0, data.shape[axis], labels_shape).astype(np.int64) loss = layer(data, labels, axis=axis) if axis == 1: torch_input = utils.from_numpy(data).requires_grad_(True) else: torch_input = utils.from_numpy(np.moveaxis(data, axis, 1)).requires_grad_(True) pytorch_loss = F.cross_entropy(torch_input, utils.from_numpy(labels), reduction=reduction) if len(pytorch_loss.shape) > 0: pytorch_loss.sum().backward() else: pytorch_loss.backward() utils.assert_close(loss, utils.to_numpy(pytorch_loss)) grad = layer.backward() torch_grad = utils.to_numpy(torch_input.grad) if axis != 1: torch_grad = np.moveaxis(torch_grad, 1, axis) utils.assert_close(grad, torch_grad, atol=0.001)
def backward(self, previous_partial_gradient): gradients = utils.from_numpy(previous_partial_gradient) input_grad = grad.conv2d_input(self.data.shape, self.weight_tensor, gradients, self.stride, self.padding) weight_grad = grad.conv2d_weight(self.data, self.weight_tensor.shape, gradients, self.stride, self.padding) bias_grad = gradients.sum((0, 2, 3)) self.weight.grad = utils.to_numpy(weight_grad.transpose(1, 0)) self.bias.grad = utils.to_numpy(bias_grad) data_gradient = utils.to_numpy(input_grad) return data_gradient
def forward(self, data): self.weight_tensor = utils.from_numpy(self.weight.data.swapaxes(0, 1)) self.bias_tensor = utils.from_numpy(self.bias.data) self.data = utils.from_numpy(data) self.output = F.conv2d(self.data, self.weight_tensor, self.bias_tensor, self.stride, self.padding) return utils.to_numpy(self.output)
def sum_product_py(x, k): x = to_numpy(x) n_samples, n_classes = x.shape res = np.zeros(n_samples) for indices in itertools.combinations(range(n_classes), k): res += np.product(x[:, indices], axis=1) return res
def backward(self, previous_partial_gradient): gradients = utils.from_numpy(previous_partial_gradient) new_gradients = torch.autograd.grad(self.output, self.data, gradients, retain_graph=False)[0] return utils.to_numpy(new_gradients)
def svm_topk_smooth_py_1(x, y, tau, k): x, y = to_numpy(x), to_numpy(y) x = x.astype(np.float128) tau = float(tau) n_samples, n_classes = x.shape exp = np.exp(x * 1. / (k * tau)) term_1 = np.zeros(n_samples) for indices in itertools.combinations(range(n_classes), k): delta = 1. - np.sum(indices == y[:, None], axis=1) term_1 += np.product(exp[:, indices], axis=1) * np.exp(delta / tau) term_2 = np.zeros(n_samples) for i in range(n_samples): all_but_y = [j for j in range(n_classes) if j != y[i]] for indices in itertools.combinations(all_but_y, k - 1): term_2[i] += np.product(exp[i, indices]) * exp[i, y[i]] loss = tau * (np.log(term_1) - np.log(term_2)) return loss
def svm_topk_smooth_py_2(x, y, tau, k): x, y = to_numpy(x), to_numpy(y) n_samples, n_classes = x.shape exp = np.exp(x * 1. / (k * tau)) term_1 = np.zeros(n_samples) for i in range(n_samples): all_but_y = [j for j in range(n_classes) if j != y[i]] for indices in itertools.combinations(all_but_y, k - 1): term_1[i] += np.product(exp[i, indices]) term_2 = np.zeros(n_samples) for i in range(n_samples): all_but_y = [j for j in range(n_classes) if j != y[i]] for indices in itertools.combinations(all_but_y, k): term_2[i] += np.product(exp[i, indices]) all_ = np.arange(n_samples) loss = tau * (np.log(term_1 * exp[all_, y] + np.exp(1. / tau) * term_2) - np.log(term_1 * exp[all_, y])) return loss
def _test_max_pool_backward(input_shape, kernel_size, stride): np.random.seed(0) torch.manual_seed(0) padding = (kernel_size - 1) // 2 input = np.random.random(input_shape).astype(np.float32) * 20 layer = MaxPoolLayer(kernel_size, stride) torch_layer = nn.MaxPool2d(kernel_size, stride, padding) output = layer.forward(input) out_grad = layer.backward(2 * np.ones_like(output) / output.size) torch_input = utils.from_numpy(input).requires_grad_(True) torch_out = torch_layer(torch_input) (2 * torch_out.mean()).backward() torch_out_grad = utils.to_numpy(torch_input.grad) utils.assert_close(out_grad, torch_out_grad, atol=TOLERANCE)
def _test_max_pool_forward(input_shape, kernel_size, stride): np.random.seed(0) torch.manual_seed(0) padding = (kernel_size - 1) // 2 input = np.random.random(input_shape).astype(np.float32) * 20 original_input = input.copy() layer = MaxPoolLayer(kernel_size, stride) torch_layer = nn.MaxPool2d(kernel_size, stride, padding) output = layer.forward(input) torch_data = utils.from_numpy(input) torch_out = utils.to_numpy(torch_layer(torch_data)) output[np.abs(output) < 1e-4] = 0 torch_out[np.abs(torch_out) < 1e-4] = 0 assert np.all(input == original_input) assert output.shape == torch_out.shape utils.assert_close(output, torch_out, atol=TOLERANCE)
def _test_forward(input_shape, reduction, axis): layer = SoftmaxCrossEntropyLossLayer(reduction=reduction) data = np.random.random(input_shape) * 2 - 1 labels_shape = list(data.shape) labels_shape.pop(axis) labels = np.random.randint(0, data.shape[axis], labels_shape).astype(np.int64) loss = layer(data, labels, axis=axis) if axis == 1: pytorch_loss = F.cross_entropy(utils.from_numpy(data), utils.from_numpy(labels), reduction=reduction) else: pytorch_loss = F.cross_entropy(utils.from_numpy(data.swapaxes(1, axis)), utils.from_numpy(labels), reduction=reduction) pytorch_loss = utils.to_numpy(pytorch_loss) utils.assert_close(loss, pytorch_loss, atol=0.001)
def log1mexp_py(x): x = to_numpy(x).astype(np.float128) res = np.log(-np.expm1(x)) return res
def forward(self, data): self.data = utils.from_numpy(data) self.data.requires_grad_(True) self.output = F.max_pool2d(self.data, self.kernel_size, self.stride, self.padding) return utils.to_numpy(self.output)