Example #1
0
    def __init__(self, X, y, l, kernel=None, penalty_start=0, mean=False):
        """
        Parameters
        ----------
        X : numpy array (n, p)
            The data matrix.

        y : numpy array (n, 1)
            The output vector. Must only contain values of -1 and 1.

        l : float
            Must be non-negative. The ridge parameter.

        kernel : kernel object, optional
            The kernel for non-linear SVM, of type
            parsimony.algorithms.utils.Kernel. Default is a linear kernel.

        penalty_start : int
            Must be non-negative. The number of columns, variables etc., to
            except from penalisation. Equivalently, the first index to be
            penalised. Default is 0, all columns are included.

        mean : bool
            Whether to compute the squared loss or the mean squared loss.
            Default is False, the loss.
        """
        self.X = X
        self.y = y

        self.l = max(0.0, float(l))

        if kernel is None:
            from parsimony.algorithms.utils import LinearKernel
            self.kernel = LinearKernel(X=self.X, use_cache=True)
            self._reset_kernel = True
        else:
            self.kernel = kernel
            self._reset_kernel = False

        self.penalty_start = max(0, int(penalty_start))
        self.mean = bool(mean)

        self.reset()
Example #2
0
    def __init__(self, C, kernel=LinearKernel(), eps=1e-4,
                 max_iter=consts.MAX_ITER, min_iter=1, info=[]):

        super(SequentialMinimalOptimization, self).__init__(kernel=kernel,
                                                            info=info)

        self.C = max(0, float(C))
        self.eps = max(consts.FLOAT_EPSILON, float(eps))
        self.min_iter = max(1, int(min_iter))
        self.max_iter = max(self.min_iter, int(max_iter))
Example #3
0
    def __init__(self, X, y, l, kernel=None, penalty_start=0, mean=False):
        """
        Parameters
        ----------
        X : numpy array (n, p)
            The data matrix.

        y : numpy array (n, 1)
            The output vector. Must only contain values of -1 and 1.

        l : float
            Must be non-negative. The ridge parameter.

        kernel : kernel object, optional
            The kernel for non-linear SVM, of type
            parsimony.algorithms.utils.Kernel. Default is a linear kernel.

        penalty_start : int
            Must be non-negative. The number of columns, variables etc., to
            except from penalisation. Equivalently, the first index to be
            penalised. Default is 0, all columns are included.

        mean : bool
            Whether to compute the squared loss or the mean squared loss.
            Default is False, the loss.
        """
        self.X = X
        self.y = y

        if np.size(l) != 1:
            raise ValueError("Not vectorized yet: parameters should be scalars")

        self.l = max(0.0, float(l))

        if kernel is None:
            from parsimony.algorithms.utils import LinearKernel
            self.kernel = LinearKernel(X=self.X, use_cache=True)
            self._reset_kernel = True
        else:
            self.kernel = kernel
            self._reset_kernel = False

        self.penalty_start = max(0, int(penalty_start))
        self.mean = bool(mean)

        self.reset()
Example #4
0
class LinearSVM(properties.Function,
                properties.SubGradient):
    """The regularised primal hinge loss function for linear support vector
    machines, i.e.

        f(w) = (1/n)).\sum_{i=1}^n max{0, 1 - y_i.<w, x_i>} + (l/2)||w||²_2.

    Note that we assume that the bias (if any!) is included in the first
    penalty_start columns of X, and those columns will not be penalised.
    """
    def __init__(self, X, y, l, kernel=None, penalty_start=0, mean=False):
        """
        Parameters
        ----------
        X : numpy array (n, p)
            The data matrix.

        y : numpy array (n, 1)
            The output vector. Must only contain values of -1 and 1.

        l : float
            Must be non-negative. The ridge parameter.

        kernel : kernel object, optional
            The kernel for non-linear SVM, of type
            parsimony.algorithms.utils.Kernel. Default is a linear kernel.

        penalty_start : int
            Must be non-negative. The number of columns, variables etc., to
            except from penalisation. Equivalently, the first index to be
            penalised. Default is 0, all columns are included.

        mean : bool
            Whether to compute the squared loss or the mean squared loss.
            Default is False, the loss.
        """
        self.X = X
        self.y = y

        if np.size(l) != 1:
            raise ValueError("Not vectorized yet: parameters should be scalars")

        self.l = max(0.0, float(l))

        if kernel is None:
            from parsimony.algorithms.utils import LinearKernel
            self.kernel = LinearKernel(X=self.X, use_cache=True)
            self._reset_kernel = True
        else:
            self.kernel = kernel
            self._reset_kernel = False

        self.penalty_start = max(0, int(penalty_start))
        self.mean = bool(mean)

        self.reset()

    def reset(self):
        """Free any cached computations from previous use of this Function.

        From the interface "Function".
        """
        if self._reset_kernel:
            self.kernel.reset()

    def f(self, w):
        """Function value.

        From the interface "Function".

        Parameters
        ----------
        w : ndarray, (p, 1)
            The coefficient vector. The point at which to evaluate the
            function.
        """
        n = self.X.shape[0]

        # Hinge loss.
        f = 0.0
        for i in xrange(n):
            f += np.maximum(0.0,
                            1.0 - self.y[i, 0] * self.kernel(self.X[i, :], w))

        # Mean loss or just the loss.
        if self.mean:
            f = f / float(n)

        # Add the l2 penalty.
        if self.penalty_start > 0:
            w_ = w[self.penalty_start:, :]
        else:
            w_ = w
        f += (self.l / 2.0) * np.sum(w_ ** 2.0, axis=0)

        return f

    def subgrad(self, w, clever=True, random_state=None, **kwargs):
        """Subgradient of the function.

        From the interface "SubGradient".

        Parameters
        ----------
        w : numpy array (p-by-1)
            The point at which to evaluate the subgradient.

        clever : bool, optional
            Whether or not to try to be "clever" when computing the
            subgradient. If True, be "clever", i.e. use favourable values of
            the subgradient; if False, use random uniform values. Default is
            True.

        random_state : numpy.random.RandomState, optional
            An instance of numpy.random.RandomState that can be used to draw
            random samples. Default is None, do not use a particular random
            state.
        """
        if random_state is None:
            random_state = np.random.RandomState()

        n = self.X.shape[0]

        grad = np.zeros((w.shape[0], 1))
        for i in xrange(n):
            xi = self.X[[i], :].T
            f = 1.0 - self.y[i, 0] * self.kernel(xi, w)
            if f > 0.0:
                grad -= self.y[i, 0] * xi  # Minus, because its -y.xi
            # The case when f <= 0.0 is handled through initialising grad to
            # zero.
            # Being clever here amounts to only handling the case when f > 0,
            # and selecting a subgradient with only zeros otherwise. This means
            # less computational work, but also since we are on the right side
            # of the margin, there is no need to go anywhere.
            if not clever:
                if abs(f) < consts.FLOAT_EPSILON:
                    a = random_state.uniform(0, 1)
                    grad -= (a * self.y[i, 0]) * self.X[i, :]

        # Add the gradient of the l2 regularisation.
        if self.penalty_start > 0:
            w_ = w[self.penalty_start:, :]
        else:
            w_ = w
        grad[self.penalty_start:, :] += self.l * w_

        return grad
Example #5
0
class LinearSVM(properties.Function, properties.SubGradient):
    """The regularised primal hinge loss function for linear support vector
    machines, i.e.

        f(w) = (1/n)).\sum_{i=1}^n max{0, 1 - y_i.<w, x_i>} + (l/2)||w||²_2.

    Note that we assume that the bias (if any!) is included in the first
    penalty_start columns of X, and those columns will not be penalised.
    """
    def __init__(self, X, y, l, kernel=None, penalty_start=0, mean=False):
        """
        Parameters
        ----------
        X : numpy array (n, p)
            The data matrix.

        y : numpy array (n, 1)
            The output vector. Must only contain values of -1 and 1.

        l : float
            Must be non-negative. The ridge parameter.

        kernel : kernel object, optional
            The kernel for non-linear SVM, of type
            parsimony.algorithms.utils.Kernel. Default is a linear kernel.

        penalty_start : int
            Must be non-negative. The number of columns, variables etc., to
            except from penalisation. Equivalently, the first index to be
            penalised. Default is 0, all columns are included.

        mean : bool
            Whether to compute the squared loss or the mean squared loss.
            Default is False, the loss.
        """
        self.X = X
        self.y = y

        self.l = max(0.0, float(l))

        if kernel is None:
            from parsimony.algorithms.utils import LinearKernel
            self.kernel = LinearKernel(X=self.X, use_cache=True)
            self._reset_kernel = True
        else:
            self.kernel = kernel
            self._reset_kernel = False

        self.penalty_start = max(0, int(penalty_start))
        self.mean = bool(mean)

        self.reset()

    def reset(self):
        """Free any cached computations from previous use of this Function.

        From the interface "Function".
        """
        if self._reset_kernel:
            self.kernel.reset()

    def f(self, w):
        """Function value.

        From the interface "Function".

        Parameters
        ----------
        w : ndarray, (p, 1)
            The coefficient vector. The point at which to evaluate the
            function.
        """
        n = self.X.shape[0]

        # Hinge loss.
        f = 0.0
        for i in xrange(n):
            f += np.maximum(0.0,
                            1.0 - self.y[i, 0] * self.kernel(self.X[i, :], w))

        # Mean loss or just the loss.
        if self.mean:
            f = f / float(n)

        # Add the l2 penalty.
        if self.penalty_start > 0:
            w_ = w[self.penalty_start:, :]
        else:
            w_ = w
        f += (self.l / 2.0) * np.sum(w_**2.0)

        return f

    def subgrad(self, w, clever=True, random_state=None, **kwargs):
        """Subgradient of the function.

        From the interface "SubGradient".

        Parameters
        ----------
        w : numpy array (p-by-1)
            The point at which to evaluate the subgradient.

        clever : bool, optional
            Whether or not to try to be "clever" when computing the
            subgradient. If True, be "clever", i.e. use favourable values of
            the subgradient; if False, use random uniform values. Default is
            True.

        random_state : numpy.random.RandomState, optional
            An instance of numpy.random.RandomState that can be used to draw
            random samples. Default is None, do not use a particular random
            state.
        """
        if random_state is None:
            random_state = np.random.RandomState()

        n = self.X.shape[0]

        grad = np.zeros((w.shape[0], 1))
        for i in xrange(n):
            xi = self.X[[i], :].T
            f = 1.0 - self.y[i, 0] * self.kernel(xi, w)
            if f > 0.0:
                grad -= self.y[i, 0] * xi  # Minus, because its -y.xi
            # The case when f <= 0.0 is handled through initialising grad to
            # zero.
            # Being clever here amounts to only handling the case when f > 0,
            # and selecting a subgradient with only zeros otherwise. This means
            # less computational work, but also since we are on the right side
            # of the margin, there is no need to go anywhere.
            if not clever:
                if abs(f) < consts.FLOAT_EPSILON:
                    a = random_state.uniform(0, 1)
                    grad -= (a * self.y[i, 0]) * self.X[i, :]

        # Add the gradient of the l2 regularisation.
        if self.penalty_start > 0:
            w_ = w[self.penalty_start:, :]
        else:
            w_ = w
        grad[self.penalty_start:, :] += self.l * w_

        return grad