Beispiel #1
0
    def grad_f_params(self, x, y=1):
        """Derivative of the decision function w.r.t. the classifier parameters.

        Parameters
        ----------
        x : CArray
            Features of the dataset on which the training objective is computed.
        y : int
            Index of the class wrt the gradient must be computed.

        """
        xs, sv_idx = self.sv_margin()  # these points are already normalized

        if xs is None:
            self.logger.debug("Warning: sv_margin is empty "
                              "(all points are error vectors).")
            return None

        xk = x if self.preprocess is None else self.preprocess.transform(x)

        s = xs.shape[0]  # margin support vector
        k = xk.shape[0]

        Ksk_ext = CArray.ones(shape=(s + 1, k))
        Ksk_ext[:s, :] = self.kernel.k(xs, xk)

        return convert_binary_labels(y) * Ksk_ext  # (s + 1) * k
    def dloss(self, y_true, score, pos_label=1):
        """Computes the derivative of the square loss function with respect to `score`.

        Parameters
        ----------
        y_true : CArray
            Ground truth (correct), targets. Vector-like array.
        score : CArray
            Outputs (predicted), targets.
            2-D array of shape (n_samples, n_classes) or 1-D flat array
            of shape (n_samples,). If 1-D array, the probabilities
            provided are assumed to be that of the positive class.
        pos_label : {0, 1}, optional
            The class wrt compute the loss function derivative. Default 1.
            If `score` is a 1-D flat array, this parameter is ignored.

        Returns
        -------
        CArray
            Derivative of the loss function. Vector-like array.

        """
        if pos_label not in (0, 1):
            raise ValueError("only {0, 1} are accepted for `pos_label`")

        y_true = convert_binary_labels(y_true).ravel()  # Convert to {-1, 1}
        score = _check_binary_score(score, pos_label)

        return -2.0 * y_true * (1.0 - y_true * score)
Beispiel #3
0
    def grad_f_params(self, x, y=1):
        """Derivative of the decision function w.r.t. alpha and b

        Parameters
        ----------
        x : CArray
            Samples on which the training objective is computed.
        y : int
            Index of the class wrt the gradient must be computed.

        """
        xs, _ = self._sv_margin()  # these points are already preprocessed

        if xs is None:
            self.logger.debug("Warning: sv_margin is empty "
                              "(all points are error vectors).")
            return None

        s = xs.shape[0]  # margin support vector
        k = x.shape[0]

        Ksk_ext = CArray.ones(shape=(s + 1, k))

        sv = self.kernel.rv  # store and recover current sv set
        self.kernel.rv = xs
        Ksk_ext[:s, :] = self.kernel.forward(x).T  # x and xs are preprocessed
        self.kernel.rv = sv

        return convert_binary_labels(y) * Ksk_ext  # (s + 1) * k
Beispiel #4
0
    def _gradient_fk_xc(self, xc, yc, clf, loss_grad, tr, k=None):
        """
        Derivative of the classifier's discriminant function f(xk)
        computed on a set of points xk w.r.t. a single poisoning point xc

        This is a classifier-specific implementation, so we delegate its
        implementation to inherited classes.

        """
        xc0 = xc.deepcopy()

        d = xc.size

        if hasattr(clf, 'C'):
            C = clf.C
        elif hasattr(clf, 'alpha'):
            C = 1.0 / clf.alpha
        else:
            raise ValueError("Error: The classifier does not have neither C "
                             "nor alpha")

        H = clf.hessian_tr_params(tr.X, tr.Y)

        # change vector dimensions to match the mathematical formulation...
        yc = convert_binary_labels(yc)
        xc = CArray(xc.ravel()).atleast_2d()  # xc is a row vector

        w = CArray(clf.w.ravel()).T  # column vector
        b = clf.b
        grad_loss_fk = CArray(loss_grad.ravel()).T  # column vector

        # validation points
        xk = self.val.X.atleast_2d()

        # handle normalizer, if present
        xc = xc if clf.preprocess is None else clf.preprocess.transform(xc)

        s_c = self._s(xc, w, b)
        sigm_c = self._sigm(yc, s_c)
        z_c = sigm_c * (1 - sigm_c)

        dbx_c = z_c * w  # column vector
        dwx_c = ((yc * (-1 + sigm_c)) *
                 CArray.eye(d, d)) + z_c * (w.dot(xc))  # matrix d*d

        G = C * (dwx_c.append(dbx_c, axis=1))

        fd_params = self.classifier.grad_f_params(xk)
        grad_loss_params = fd_params.dot(grad_loss_fk)

        gt = self._compute_grad_inv(G, H, grad_loss_params)
        # gt = self._compute_grad_solve(G, H, grad_loss_params)
        # gt = self._compute_grad_solve_iterative(G, H, grad_loss_params) #*

        # propagating gradient back to input space
        if clf.preprocess is not None:
            return clf.preprocess.gradient(xc0, w=gt)

        return gt
Beispiel #5
0
    def _gradient_fk_xc(self, xc, yc, clf, loss_grad, tr, k=None):
        """
        Derivative of the classifier's discriminant function f(xk)
        computed on a set of points xk w.r.t. a single poisoning point xc

        This is a classifier-specific implementation, so we delegate its
        implementation to inherited classes.
        """

        # we should add a control here. convert_binary_labels should not be
        #  called when y is continuous (regression problems)
        yc = convert_binary_labels(yc)

        xc0 = xc.deepcopy()

        # take validation points
        xk = self._val.X.atleast_2d()
        x = tr.X.atleast_2d()

        H = clf.hessian_tr_params(x)

        grad_loss_fk = CArray(loss_grad.ravel()).T  # column vector

        # handle normalizer, if present
        xc = xc if clf.preprocess is None else clf.preprocess.transform(xc)
        xc = xc.ravel().atleast_2d()
        #xk = xk if clf.preprocess is None else clf.preprocess.transform(xk)

        # gt is the gradient in feature space
        k = xk.shape[0]  # num validation samples
        d = xk.shape[1]  # num features

        M = clf.w.T.dot(
            xc)  # xc is column, w is row (this is an outer product)
        M += (clf.w.dot(xc.T) + clf.b - yc) * CArray.eye(d)
        db_xc = clf.w.T
        G = M.append(db_xc, axis=1)

        # add diagonal noise to the matrix that we are gong to invert
        H += 1e-9 * (CArray.eye(d + 1))

        # # compute the derivatives of the classifier discriminant function
        fd_params = self.classifier.grad_f_params(xk)
        grad_loss_params = fd_params.dot(grad_loss_fk)

        # gt is the gradient in feature space
        gt = self._compute_grad_inv(G, H, grad_loss_params)
        # gt = self._compute_grad_solve(G, H, grad_loss_params)
        # gt = self._compute_grad_solve_iterative(G, H, grad_loss_params) #*

        # propagating gradient back to input space
        if clf.preprocess is not None:
            return clf.preprocess.gradient(xc0, w=gt)

        return gt
    def dloss(self, y_true, score, pos_label=1, bound=10):
        """Computes the derivative of the hinge loss function with respect to `score`.

        Parameters
        ----------
        y_true : CArray
            Ground truth (correct), targets. Vector-like array.
        score : CArray
            Outputs (predicted), targets.
            2-D array of shape (n_samples, n_classes) or 1-D flat array
            of shape (n_samples,). If 1-D array, the probabilities
            provided are assumed to be that of the positive class.
        pos_label : {0, 1}, optional
            The class wrt compute the loss function derivative. Default 1.
            If `score` is a 1-D flat array, this parameter is ignored.
        bound : scalar or None, optional
            Set an upper bound for a linear approximation when -y*s is large
            to avoid numerical overflows.
            10 is a generally acceptable -> log(1+exp(10)) = 10.000045

        Returns
        -------
        CArray
            Derivative of the loss function. Vector-like array.

        """
        if pos_label not in (0, 1):
            raise ValueError("only {0, 1} are accepted for `pos_label`")

        y_true = convert_binary_labels(y_true).ravel()  # Convert to {-1, 1}
        score = _check_binary_score(score, pos_label)

        # d/df log ( 1+ exp(-yf)) / log(2)  =
        #     1/ log(2) * ( 1+ exp(-yf)) exp(-yf) -y

        v = CArray(-y_true * score).astype(float)

        if bound is None:
            h = -y_true * v.exp() / (1.0 + v.exp())

        else:
            # linear approximation avoids numerical overflows
            # when -yf >> 1 : loss ~= -yf, and grad = -y
            h = -y_true.astype(float)
            h[v < bound] = h[v < bound] * v[v < bound].exp() / \
                                                    (1.0 + v[v < bound].exp())

        return h / CArray([2]).log()
Beispiel #7
0
    def hessian_tr_params(self, x, y):
        """Hessian of the training objective w.r.t. the classifier parameters.

        Parameters
        ----------
        x : CArray
            Features of the dataset on which the training objective is computed.
        y : CArray
            Dataset labels.

        """
        y = y.ravel()
        y = convert_binary_labels(y)
        y = CArray(y).astype(float).T  # column vector

        C = self.C

        x = x.atleast_2d()
        n = x.shape[0]

        # nb: we compute the score before the x normalization as decision
        # function normalizes x
        s = self.decision_function(x, y=1).T
        sigm = self._sigm(y, s)
        z = sigm * (1 - sigm)

        # handle normalizer, if present
        x = x if self.preprocess is None else self.preprocess.transform(x)

        d = x.shape[1]  # number of features in the normalized space

        # first derivative wrt b derived w.r.t. w
        diag = z * CArray.eye(n_rows=n, n_cols=n)
        dww = C * (x.T.dot(diag).dot(x)) + CArray.eye(d, d)  # matrix d*d
        dbw = C * ((z * x).sum(axis=0)).T  # column vector
        dbb = C * (z.sum(axis=None))  # scalar

        H = CArray.zeros((d + 1, d + 1))
        H[:d, :d] = dww
        H[:-1, d] = dbw
        H[d, :-1] = dbw.T
        H[-1, -1] = dbb

        return H
    def grad_f_params(self, x, y=1):
        """Derivative of the decision function w.r.t. the classifier parameters.

        Parameters
        ----------
        x : CArray
            Features of the dataset on which the training objective is computed.
        y : int
            Index of the class wrt the gradient must be computed.

        """
        if self.preprocess is not None:
            x = self.preprocess.transform(x)

        grad_f_w = self._grad_f_w(x)
        grad_f_b = self._grad_f_b(x)

        d = grad_f_w.append(grad_f_b, axis=0)

        return convert_binary_labels(y) * d
Beispiel #9
0
    def _fit(self, dataset):
        """Trains the One-Vs-All SVM classifier.

        Parameters
        ----------
        dataset : CDataset
            Binary (2-classes) training set. Must be a :class:`.CDataset`
            instance with patterns data and corresponding labels.

        Returns
        -------
        trained_cls : CCLassifierSVM
            Instance of the SVM classifier trained using input dataset.

        """
        self.logger.info("Training SVM with parameters: {:}".format(
            self.get_params()))
        # Setting up classifier parameters
        classifier = SVC(
            C=self.C,
            class_weight=self.class_weight,
            kernel='linear' if self.is_kernel_linear() else 'precomputed')

        # Computing the kernel matrix
        if not self.is_kernel_linear():
            self._k = CArray(self.kernel.k(dataset.X))
        else:
            self._k = dataset.X

        # Training classifier using precomputed kernel
        classifier.fit(self._k.get_data(), dataset.Y.tondarray())

        # Intercept
        self._b = CArray(classifier.intercept_[0])[0]
        self.logger.debug("Classifier SVM bias: {:}".format(self._b))

        # Updating SVM parameters
        self._w = None  # Resetting `_w` to leave it None next cond is False
        if self.is_kernel_linear():  # Linear SVM
            self._w = CArray(
                CArray(classifier.coef_, tosparse=dataset.issparse).ravel())
            self.logger.debug("Classifier SVM linear weights: \n{:}".format(
                self._w))

        if not self.is_kernel_linear() or self.store_dual_vars is True:
            # Dual Space SVM or forced dual variables store
            self._n_sv = CArray(classifier.n_support_)
            self._sv_idx = CArray(classifier.support_).ravel()
            # Compatibility fix for differences between sklearn versions
            self._alpha = convert_binary_labels(dataset.Y[self.sv_idx]) * \
                          abs(CArray(classifier.dual_coef_).todense().ravel())
            self._sv = CArray(dataset.X[self.sv_idx, :])
            self.logger.debug("Classifier SVM dual weights (alphas): "
                              "\n{:}".format(self._alpha))
        else:  # Resetting the dual parameters
            self._n_sv = None
            self._sv_idx = None
            self._alpha = None
            self._sv = None

        return classifier
Beispiel #10
0
    def _fit(self, x, y):
        """Trains the One-Vs-All SVM classifier.

        Parameters
        ----------
        x : CArray
            Array to be used for training with shape (n_samples, n_features).
        y : CArray
            Array of shape (n_samples,) containing the class
            labels (2-classes only).

        Returns
        -------
        CClassifierSVM
            Trained classifier.

        """
        self.logger.info("Training SVM with parameters: {:}".format(
            self.get_params()))
        # Setting up classifier parameters
        classifier = SVC(
            C=self.C,
            class_weight=self.class_weight,
            kernel='linear' if self.is_kernel_linear() else 'precomputed')

        # Computing the kernel matrix
        if not self.is_kernel_linear():
            self._k = CArray(self.kernel.k(x))
        else:
            self._k = x

        # Training classifier using precomputed kernel
        classifier.fit(self._k.get_data(), y.tondarray())

        # Intercept
        self._b = CArray(classifier.intercept_[0])[0]
        self.logger.debug("Classifier SVM bias: {:}".format(self._b))

        # Updating SVM parameters
        self._w = None  # Resetting `_w` to leave it None next cond is False
        if self.is_kernel_linear():  # Linear SVM
            self._w = CArray(
                CArray(classifier.coef_, tosparse=x.issparse).ravel())
            self.logger.debug("Classifier SVM linear weights: \n{:}".format(
                self._w))

        if not self.is_kernel_linear() or self.store_dual_vars is True:
            # Dual Space SVM or forced dual variables store
            self._n_sv = CArray(classifier.n_support_)
            self._sv_idx = CArray(classifier.support_).ravel()
            # Compatibility fix for differences between sklearn versions
            self._alpha = convert_binary_labels(y[self.sv_idx]) * \
                          abs(CArray(classifier.dual_coef_).todense().ravel())
            self._sv = CArray(x[self.sv_idx, :])
            self.logger.debug("Classifier SVM dual weights (alphas): "
                              "\n{:}".format(self._alpha))
        else:  # Resetting the dual parameters
            self._n_sv = None
            self._sv_idx = None
            self._alpha = None
            self._sv = None

        return classifier
Beispiel #11
0
    def _fit(self, x, y):
        """Trains the One-Vs-All SVM classifier.

        Parameters
        ----------
        x : CArray
            Array to be used for training with shape (n_samples, n_features).
        y : CArray
            Array of shape (n_samples,) containing the class
            labels (2-classes only).

        Returns
        -------
        CClassifierSecSVM
            Trained classifier.

        """
        if self.n_classes != 2:
            raise ValueError(
                "Trying to learn an SVM on more/less than two classes.")

        y = convert_binary_labels(y)

        if self.class_weight == 'balanced':
            n_pos = y[y == 1].shape[0]
            n_neg = y[y == -1].shape[0]
            self.weight = CArray.zeros(2)
            self.weight[0] = 1.0 * n_pos / (n_pos + n_neg)
            self.weight[1] = 1.0 * n_neg / (n_pos + n_neg)

        self._w = CArray.zeros(x.shape[1])
        self._b = CArray(0.0)

        obj = self.objective(x, y)
        obj_new = obj

        for i in range(self.max_it):

            # pick a random sample subset
            idx = CArray.randsample(CArray.arange(x.shape[0], dtype=int),
                                    x.shape[0],
                                    random_state=i)

            # compute subgradients
            grad_w, grad_b = self.gradient_w_b(x[idx, :], y[idx])

            for p in range(0, 71, 10):

                step = (self.eta**p) * 2**(-0.01 * i) / (x.shape[0]**0.5)

                self._w -= step * grad_w
                self._b -= step * grad_b

                # Applying UPPER bound
                d_ub = self.w[self._idx_ub]
                d_ub[d_ub > self._ub] = self._ub
                self.w[self._idx_ub] = d_ub

                # Applying LOWER bound
                d_lb = self.w[self._idx_lb]
                d_lb[d_lb < self._lb] = self._lb
                self.w[self._idx_lb] = d_lb

                obj_new = self.objective(x, y)

                if obj_new < obj:
                    break

            if abs(obj_new - obj) < self.eps:
                self.logger.info("i {:}: {:}".format(i, obj_new))
                # Sparse weights if input is sparse (like in CClassifierSVM)
                self._w = self.w.tosparse() if x.issparse else self.w
                return

            obj = obj_new

            if i % 10 == 0:
                loss = self.hinge_loss(x, y).sum()
                self.logger.info("i {:}: {:.4f}, L {:.4f}".format(
                    i, obj, loss))
            # Sparse weights if input is sparse (like in CClassifierSVM)
            self._w = self.w.tosparse() if x.issparse else self.w