def _gradient_fk_xc(self, xc, yc, clf, loss_grad, tr, k=None): """ Derivative of the classifier's discriminant function f(xk) computed on a set of points xk w.r.t. a single poisoning point xc This is a classifier-specific implementation, so we delegate its implementation to inherited classes. """ # we should add a control here. convert_binary_labels should not be # called when y is continuous (regression problems) yc = convert_binary_labels(yc) xc0 = xc.deepcopy() # take validation points xk = self._val.X.atleast_2d() x = tr.X.atleast_2d() H = clf.hessian_tr_params(x) grad_loss_fk = CArray(loss_grad.ravel()).T # column vector # handle normalizer, if present xc = xc if clf.preprocess is None else clf.preprocess.transform(xc) xc = xc.ravel().atleast_2d() #xk = xk if clf.preprocess is None else clf.preprocess.transform(xk) # gt is the gradient in feature space k = xk.shape[0] # num validation samples d = xk.shape[1] # num features M = clf.w.T.dot( xc) # xc is column, w is row (this is an outer product) M += (clf.w.dot(xc.T) + clf.b - yc) * CArray.eye(d) db_xc = clf.w.T G = M.append(db_xc, axis=1) # add diagonal noise to the matrix that we are gong to invert H += 1e-9 * (CArray.eye(d + 1)) # # compute the derivatives of the classifier discriminant function fd_params = self.classifier.grad_f_params(xk) grad_loss_params = fd_params.dot(grad_loss_fk) # gt is the gradient in feature space gt = self._compute_grad_inv(G, H, grad_loss_params) # gt = self._compute_grad_solve(G, H, grad_loss_params) # gt = self._compute_grad_solve_iterative(G, H, grad_loss_params) #* # propagating gradient back to input space if clf.preprocess is not None: return clf.preprocess.gradient(xc0, w=gt) return gt
def hessian_tr_params(self, x, y=None): """Hessian of the training objective w.r.t. the classifier parameters. Parameters ---------- x : CArray Features of the dataset on which the training objective is computed. y : CArray Dataset labels. """ alpha = self.alpha x = x.atleast_2d() n = x.shape[0] # handle normalizer, if present x = x if self.preprocess is None else self.preprocess.transform(x) d = x.shape[1] # number of features in the normalized space H = CArray.zeros(shape=(d + 1, d + 1)) Sigma = x.T.dot(x) dww = Sigma + alpha * CArray.eye(d) dwb = x.sum(axis=0) H[:-1, :-1] = dww H[-1, -1] = n # + self.alpha H[-1, :-1] = dwb H[:-1, -1] = dwb.T H *= 2.0 return H
def _backward(self, w=None): """ Compute the gradient w.r.t. the input cached during the forward pass. Parameters ---------- w : CArray or None, optional If CArray, will be left-multiplied to the gradient of the preprocessor. Returns ------- gradient : CArray Gradient of the normalizer wrt input data. it will have dimensionality shape (w.shape[0], x.shape[1]) if `w` is passed as input (x.shape[1], x.shape[1]) otherwise. """ x = self._cached_x d = self._cached_x.size # get the number of features # compute the norm of x: ||x|| x_norm = self._compute_x_norm(x) # compute the gradient of the given norm: d||x||/dx grad_norm_x = self._compute_norm_gradient(x, x_norm) # this is the derivative of the ratio x/||x|| grad = CArray.eye(d, d) * x_norm.item() - grad_norm_x.T.dot(x) grad /= (x_norm**2) return grad if w is None else w.dot(grad)
def test_eye(self): """Test for CArray.eye() classmethod.""" self.logger.info("Test for CArray.eye() classmethod.") for dtype in [None, float, int, bool]: for sparse in [False, True]: for n_rows in [0, 1, 2, 3]: for n_cols in [None, 0, 1, 2, 3]: for k in [0, 1, 2, 3, -1, -2, -3]: res = CArray.eye(n_rows=n_rows, n_cols=n_cols, k=k, dtype=dtype, sparse=sparse) self.logger.info( "CArray.eye(n_rows={:}, n_cols={:}, k={:}, " "dtype={:}, sparse={:}):\n{:}".format( n_rows, n_cols, k, dtype, sparse, res)) self.assertIsInstance(res, CArray) self.assertEqual(res.isdense, not sparse) self.assertEqual(res.issparse, sparse) if dtype is None: # Default dtype is float self.assertIsSubDtype(res.dtype, float) else: self.assertIsSubDtype(res.dtype, dtype) # n_cols takes n_rows if None n_cols = n_rows if n_cols is None else n_cols self.assertEqual(res.shape, (n_rows, n_cols)) # Resulting array has no elements, skip more checks if res.size == 0: continue # Check if the diagonal is moving according to k if k > 0: self.assertEqual( 0, res[0, min(n_cols - 1, k - 1)].item()) elif k < 0: self.assertEqual( 0, res[min(n_rows - 1, abs(k) - 1), 0].item()) else: # The top left corner is a one self.assertEqual(1, res[0, 0]) # Check the number of ones n_ones = (res == 1).sum() if k >= 0: self.assertEqual( max(0, min(n_rows, n_cols - k)), n_ones) else: self.assertEqual( max(0, min(n_cols, n_rows - abs(k))), n_ones) # Check if there are other elements apart from 0,1 self.assertFalse(((res != 0).logical_and( (res == 1).logical_not()).any()))
def _gradient_fk_xc(self, xc, yc, clf, loss_grad, tr, k=None): """ Derivative of the classifier's discriminant function f(xk) computed on a set of points xk w.r.t. a single poisoning point xc This is a classifier-specific implementation, so we delegate its implementation to inherited classes. """ xc0 = xc.deepcopy() d = xc.size if hasattr(clf, 'C'): C = clf.C elif hasattr(clf, 'alpha'): C = 1.0 / clf.alpha else: raise ValueError("Error: The classifier does not have neither C " "nor alpha") H = clf.hessian_tr_params(tr.X, tr.Y) # change vector dimensions to match the mathematical formulation... yc = convert_binary_labels(yc) xc = CArray(xc.ravel()).atleast_2d() # xc is a row vector w = CArray(clf.w.ravel()).T # column vector b = clf.b grad_loss_fk = CArray(loss_grad.ravel()).T # column vector # validation points xk = self.val.X.atleast_2d() # handle normalizer, if present xc = xc if clf.preprocess is None else clf.preprocess.transform(xc) s_c = self._s(xc, w, b) sigm_c = self._sigm(yc, s_c) z_c = sigm_c * (1 - sigm_c) dbx_c = z_c * w # column vector dwx_c = ((yc * (-1 + sigm_c)) * CArray.eye(d, d)) + z_c * (w.dot(xc)) # matrix d*d G = C * (dwx_c.append(dbx_c, axis=1)) fd_params = self.classifier.grad_f_params(xk) grad_loss_params = fd_params.dot(grad_loss_fk) gt = self._compute_grad_inv(G, H, grad_loss_params) # gt = self._compute_grad_solve(G, H, grad_loss_params) # gt = self._compute_grad_solve_iterative(G, H, grad_loss_params) #* # propagating gradient back to input space if clf.preprocess is not None: return clf.preprocess.gradient(xc0, w=gt) return gt
def test_2D(self): """Plot of a 2D example.""" grid_limits = [(-4, 4), (-4, 4)] A = CArray.eye(2, 2) b = CArray.zeros(2).T circle = CFunction.create('quadratic', A, b, 0) self._test_2D(circle, grid_limits, levels=[16])
def hessian_tr_params(self, x, y): """Hessian of the training objective w.r.t. the classifier parameters. Parameters ---------- x : CArray Features of the dataset on which the training objective is computed. y : CArray Dataset labels. """ y = y.ravel() y = convert_binary_labels(y) y = CArray(y).astype(float).T # column vector C = self.C x = x.atleast_2d() n = x.shape[0] # nb: we compute the score before the x normalization as decision # function normalizes x s = self.decision_function(x, y=1).T sigm = self._sigm(y, s) z = sigm * (1 - sigm) # handle normalizer, if present x = x if self.preprocess is None else self.preprocess.transform(x) d = x.shape[1] # number of features in the normalized space # first derivative wrt b derived w.r.t. w diag = z * CArray.eye(n_rows=n, n_cols=n) dww = C * (x.T.dot(diag).dot(x)) + CArray.eye(d, d) # matrix d*d dbw = C * ((z * x).sum(axis=0)).T # column vector dbb = C * (z.sum(axis=None)) # scalar H = CArray.zeros((d + 1, d + 1)) H[:d, :d] = dww H[:-1, d] = dbw H[d, :-1] = dbw.T H[-1, -1] = dbb return H
def _dts_function(self, X): """ TODO: Put a comment for this function. """ from secml.ml.stats import CDistributionGaussian d = X.shape[1] # number of features Y = self.bias for gauss_idx in range(len(self.centers)): Y += self.w[gauss_idx] * \ CDistributionGaussian(mean=self.centers[gauss_idx], cov=self.cluster_std[gauss_idx] * CArray.eye(d, d)).pdf(X) return Y
def _backward(self, w=None): """Compute the gradient w.r.t. the input cached during the forward pass. Parameters ---------- w : CArray or None, optional If CArray, will be left-multiplied to the gradient of the preprocessor. Returns ------- gradient : CArray Gradient of the normalizer wrt input data. it will have dimensionality shape (w.shape[0], x.shape[1]) if `w` is passed as input (x.shape[1], x.shape[1]) otherwise. """ x = self._cached_x if x.shape[0] > 1: raise ValueError("Parameter 'x' passed to the forward() method " "needs to be a one dimensional vector " "(passed a {:} dimensional vector)".format( x.ndim)) d = self._cached_x.size # get the number of features if w is not None: if (w.ndim != 1) or (w.size != d): raise ValueError("Parameter 'w' needs to be a one dimensional " "vector with the same number of elements " "of parameter 'x' of the forward method " "(passed a {:} dimensional vector with {:} " "elements)".format(w.ndim, w.size)) # compute the norm of x: ||x|| x_norm = self._compute_x_norm(x) # compute the gradient of the given norm: d||x||/dx grad_norm_x = self._compute_norm_gradient(x, x_norm) # this is the derivative of the ratio x/||x|| grad = CArray.eye(d, d) * x_norm.item() - grad_norm_x.T.dot(x) grad /= (x_norm**2) return grad if w is None else w.dot(grad)
def _quadratic_fun(d): """Creates a quadratic function in d dimensions.""" def _quadratic_fun_min(A, b): from scipy import linalg min_x_scipy = linalg.solve((2 * A).tondarray(), -b.tondarray(), sym_pos=True) return CArray(min_x_scipy).ravel() A = CArray.eye(d, d) b = CArray.ones((d, 1)) * 2 discr_fun = CFunction.create('quadratic', A, b, c=0) min_x = _quadratic_fun_min(A, b) min_val = discr_fun.fun(min_x) discr_fun.global_min = lambda: min_val discr_fun.global_min_x = lambda: min_x return discr_fun
def explain(self, x, y, return_grad=False): """Compute influence of test sample x against all training samples. Parameters ---------- x : CArray Input sample. y : int Class wrt compute the classifier gradient. return_grad : bool, optional If True, also return the clf gradient computed on x. Default False. """ H = self.hessian(x, y) p = H.shape[0] H += 1e-9 * (CArray.eye(p)) if self._inv_H is None: # compute hessian inverse det = linalg.det(H.tondarray()) if abs(det) < 1e-6: self._inv_H = CArray(linalg.pinv2(H.tondarray())) else: self._inv_H = CArray(linalg.inv(H.tondarray())) x = x.atleast_2d() if self._grad_inner_loss_params is None: self._grad_inner_loss_params = self.grad_inner_loss_params( self.tr_ds.X, self.tr_ds.Y) v = self.grad_outer_loss_params(x, y).T.dot(self._inv_H).dot( self._grad_inner_loss_params) return (v, H) if return_grad is True else v
def _gradient_fk_xc(self, xc, yc, clf, loss_grad, tr, k=None): """ Derivative of the classifier's discriminant function f(xk) computed on a set of points xk w.r.t. a single poisoning point xc """ svm = clf # classifier is an SVM xc0 = xc.deepcopy() d = xc.size grad = CArray.zeros(shape=(d, )) # gradient in input space alpha_c = self._alpha_c(clf) if abs(alpha_c) == 0: # < svm.C: # this include alpha_c == 0 # self.logger.debug("Warning: xc is not an error vector.") return grad # take only validation points with non-null loss xk = self._val.X[abs(loss_grad) > 0, :].atleast_2d() grad_loss_fk = CArray(loss_grad[abs(loss_grad) > 0]).T # gt is the gradient in feature space # this gradient component is the only one if margin SV set is empty # gt is the derivative of the loss computed on a validation # set w.r.t. xc Kd_xc = self._Kd_xc(svm, alpha_c, xc, xk) gt = Kd_xc.dot(grad_loss_fk).ravel() # gradient of the loss w.r.t. xc xs, sv_idx = clf.sv_margin() # these points are already normalized if xs is None: self.logger.debug("Warning: xs is empty " "(all points are error vectors).") return gt if svm.preprocess is None else \ svm.preprocess.gradient(xc0, w=gt) s = xs.shape[0] # derivative of the loss computed on a validation set w.r.t. the # classifier params fd_params = svm.grad_f_params(xk) # grad_loss_params = fd_params.dot(-grad_loss_fk) grad_loss_params = fd_params.dot(grad_loss_fk) H = clf.hessian_tr_params() H += 1e-9 * CArray.eye(s + 1) # handle normalizer, if present xc = xc if clf.preprocess is None else clf.preprocess.transform(xc) G = CArray.zeros(shape=(gt.size, s + 1)) svm.kernel.rv = xs G[:, :s] = svm.kernel.gradient(xc).T G *= alpha_c # warm start is disabled if the set of SVs changes! # if self._sv_idx is None or self._sv_idx.size != sv_idx.size or \ # (self._sv_idx != sv_idx).any(): # self._warm_start = None # self._sv_idx = sv_idx # store SV indices for the next iteration # # # iterative solver # v = - self._compute_grad_solve_iterative( # G, H, grad_loss_params, tol=1e-3) # solve with standard linear solver # v = - self._compute_grad_solve(G, H, grad_loss_params, sym_pos=False) # solve using inverse/pseudo-inverse of H # v = - self._compute_grad_inv(G, H, grad_loss_params) v = self._compute_grad_inv(G, H, grad_loss_params) gt += v # propagating gradient back to input space if clf.preprocess is not None: return clf.preprocess.gradient(xc0, w=gt) return gt
def _g(self, d): return CArray.eye(d)
def setUp(self): A = CArray.eye(2, 2) b = CArray.zeros((2, 1)) c = 0 self.fun = CFunction.create('quadratic', A, b, c)