def __init__(self, X, y, l, kernel=None, penalty_start=0, mean=False): """ Parameters ---------- X : numpy array (n, p) The data matrix. y : numpy array (n, 1) The output vector. Must only contain values of -1 and 1. l : float Must be non-negative. The ridge parameter. kernel : kernel object, optional The kernel for non-linear SVM, of type parsimony.algorithms.utils.Kernel. Default is a linear kernel. penalty_start : int Must be non-negative. The number of columns, variables etc., to except from penalisation. Equivalently, the first index to be penalised. Default is 0, all columns are included. mean : bool Whether to compute the squared loss or the mean squared loss. Default is False, the loss. """ self.X = X self.y = y self.l = max(0.0, float(l)) if kernel is None: from parsimony.algorithms.utils import LinearKernel self.kernel = LinearKernel(X=self.X, use_cache=True) self._reset_kernel = True else: self.kernel = kernel self._reset_kernel = False self.penalty_start = max(0, int(penalty_start)) self.mean = bool(mean) self.reset()
def __init__(self, C, kernel=LinearKernel(), eps=1e-4, max_iter=consts.MAX_ITER, min_iter=1, info=[]): super(SequentialMinimalOptimization, self).__init__(kernel=kernel, info=info) self.C = max(0, float(C)) self.eps = max(consts.FLOAT_EPSILON, float(eps)) self.min_iter = max(1, int(min_iter)) self.max_iter = max(self.min_iter, int(max_iter))
def __init__(self, X, y, l, kernel=None, penalty_start=0, mean=False): """ Parameters ---------- X : numpy array (n, p) The data matrix. y : numpy array (n, 1) The output vector. Must only contain values of -1 and 1. l : float Must be non-negative. The ridge parameter. kernel : kernel object, optional The kernel for non-linear SVM, of type parsimony.algorithms.utils.Kernel. Default is a linear kernel. penalty_start : int Must be non-negative. The number of columns, variables etc., to except from penalisation. Equivalently, the first index to be penalised. Default is 0, all columns are included. mean : bool Whether to compute the squared loss or the mean squared loss. Default is False, the loss. """ self.X = X self.y = y if np.size(l) != 1: raise ValueError("Not vectorized yet: parameters should be scalars") self.l = max(0.0, float(l)) if kernel is None: from parsimony.algorithms.utils import LinearKernel self.kernel = LinearKernel(X=self.X, use_cache=True) self._reset_kernel = True else: self.kernel = kernel self._reset_kernel = False self.penalty_start = max(0, int(penalty_start)) self.mean = bool(mean) self.reset()
class LinearSVM(properties.Function, properties.SubGradient): """The regularised primal hinge loss function for linear support vector machines, i.e. f(w) = (1/n)).\sum_{i=1}^n max{0, 1 - y_i.<w, x_i>} + (l/2)||w||²_2. Note that we assume that the bias (if any!) is included in the first penalty_start columns of X, and those columns will not be penalised. """ def __init__(self, X, y, l, kernel=None, penalty_start=0, mean=False): """ Parameters ---------- X : numpy array (n, p) The data matrix. y : numpy array (n, 1) The output vector. Must only contain values of -1 and 1. l : float Must be non-negative. The ridge parameter. kernel : kernel object, optional The kernel for non-linear SVM, of type parsimony.algorithms.utils.Kernel. Default is a linear kernel. penalty_start : int Must be non-negative. The number of columns, variables etc., to except from penalisation. Equivalently, the first index to be penalised. Default is 0, all columns are included. mean : bool Whether to compute the squared loss or the mean squared loss. Default is False, the loss. """ self.X = X self.y = y if np.size(l) != 1: raise ValueError("Not vectorized yet: parameters should be scalars") self.l = max(0.0, float(l)) if kernel is None: from parsimony.algorithms.utils import LinearKernel self.kernel = LinearKernel(X=self.X, use_cache=True) self._reset_kernel = True else: self.kernel = kernel self._reset_kernel = False self.penalty_start = max(0, int(penalty_start)) self.mean = bool(mean) self.reset() def reset(self): """Free any cached computations from previous use of this Function. From the interface "Function". """ if self._reset_kernel: self.kernel.reset() def f(self, w): """Function value. From the interface "Function". Parameters ---------- w : ndarray, (p, 1) The coefficient vector. The point at which to evaluate the function. """ n = self.X.shape[0] # Hinge loss. f = 0.0 for i in xrange(n): f += np.maximum(0.0, 1.0 - self.y[i, 0] * self.kernel(self.X[i, :], w)) # Mean loss or just the loss. if self.mean: f = f / float(n) # Add the l2 penalty. if self.penalty_start > 0: w_ = w[self.penalty_start:, :] else: w_ = w f += (self.l / 2.0) * np.sum(w_ ** 2.0, axis=0) return f def subgrad(self, w, clever=True, random_state=None, **kwargs): """Subgradient of the function. From the interface "SubGradient". Parameters ---------- w : numpy array (p-by-1) The point at which to evaluate the subgradient. clever : bool, optional Whether or not to try to be "clever" when computing the subgradient. If True, be "clever", i.e. use favourable values of the subgradient; if False, use random uniform values. Default is True. random_state : numpy.random.RandomState, optional An instance of numpy.random.RandomState that can be used to draw random samples. Default is None, do not use a particular random state. """ if random_state is None: random_state = np.random.RandomState() n = self.X.shape[0] grad = np.zeros((w.shape[0], 1)) for i in xrange(n): xi = self.X[[i], :].T f = 1.0 - self.y[i, 0] * self.kernel(xi, w) if f > 0.0: grad -= self.y[i, 0] * xi # Minus, because its -y.xi # The case when f <= 0.0 is handled through initialising grad to # zero. # Being clever here amounts to only handling the case when f > 0, # and selecting a subgradient with only zeros otherwise. This means # less computational work, but also since we are on the right side # of the margin, there is no need to go anywhere. if not clever: if abs(f) < consts.FLOAT_EPSILON: a = random_state.uniform(0, 1) grad -= (a * self.y[i, 0]) * self.X[i, :] # Add the gradient of the l2 regularisation. if self.penalty_start > 0: w_ = w[self.penalty_start:, :] else: w_ = w grad[self.penalty_start:, :] += self.l * w_ return grad
class LinearSVM(properties.Function, properties.SubGradient): """The regularised primal hinge loss function for linear support vector machines, i.e. f(w) = (1/n)).\sum_{i=1}^n max{0, 1 - y_i.<w, x_i>} + (l/2)||w||²_2. Note that we assume that the bias (if any!) is included in the first penalty_start columns of X, and those columns will not be penalised. """ def __init__(self, X, y, l, kernel=None, penalty_start=0, mean=False): """ Parameters ---------- X : numpy array (n, p) The data matrix. y : numpy array (n, 1) The output vector. Must only contain values of -1 and 1. l : float Must be non-negative. The ridge parameter. kernel : kernel object, optional The kernel for non-linear SVM, of type parsimony.algorithms.utils.Kernel. Default is a linear kernel. penalty_start : int Must be non-negative. The number of columns, variables etc., to except from penalisation. Equivalently, the first index to be penalised. Default is 0, all columns are included. mean : bool Whether to compute the squared loss or the mean squared loss. Default is False, the loss. """ self.X = X self.y = y self.l = max(0.0, float(l)) if kernel is None: from parsimony.algorithms.utils import LinearKernel self.kernel = LinearKernel(X=self.X, use_cache=True) self._reset_kernel = True else: self.kernel = kernel self._reset_kernel = False self.penalty_start = max(0, int(penalty_start)) self.mean = bool(mean) self.reset() def reset(self): """Free any cached computations from previous use of this Function. From the interface "Function". """ if self._reset_kernel: self.kernel.reset() def f(self, w): """Function value. From the interface "Function". Parameters ---------- w : ndarray, (p, 1) The coefficient vector. The point at which to evaluate the function. """ n = self.X.shape[0] # Hinge loss. f = 0.0 for i in xrange(n): f += np.maximum(0.0, 1.0 - self.y[i, 0] * self.kernel(self.X[i, :], w)) # Mean loss or just the loss. if self.mean: f = f / float(n) # Add the l2 penalty. if self.penalty_start > 0: w_ = w[self.penalty_start:, :] else: w_ = w f += (self.l / 2.0) * np.sum(w_**2.0) return f def subgrad(self, w, clever=True, random_state=None, **kwargs): """Subgradient of the function. From the interface "SubGradient". Parameters ---------- w : numpy array (p-by-1) The point at which to evaluate the subgradient. clever : bool, optional Whether or not to try to be "clever" when computing the subgradient. If True, be "clever", i.e. use favourable values of the subgradient; if False, use random uniform values. Default is True. random_state : numpy.random.RandomState, optional An instance of numpy.random.RandomState that can be used to draw random samples. Default is None, do not use a particular random state. """ if random_state is None: random_state = np.random.RandomState() n = self.X.shape[0] grad = np.zeros((w.shape[0], 1)) for i in xrange(n): xi = self.X[[i], :].T f = 1.0 - self.y[i, 0] * self.kernel(xi, w) if f > 0.0: grad -= self.y[i, 0] * xi # Minus, because its -y.xi # The case when f <= 0.0 is handled through initialising grad to # zero. # Being clever here amounts to only handling the case when f > 0, # and selecting a subgradient with only zeros otherwise. This means # less computational work, but also since we are on the right side # of the margin, there is no need to go anywhere. if not clever: if abs(f) < consts.FLOAT_EPSILON: a = random_state.uniform(0, 1) grad -= (a * self.y[i, 0]) * self.X[i, :] # Add the gradient of the l2 regularisation. if self.penalty_start > 0: w_ = w[self.penalty_start:, :] else: w_ = w grad[self.penalty_start:, :] += self.l * w_ return grad