Example #1
0
def experiment_anomaly_detection(train, test, comb, num_train, anom_prob, labels):
    # train one-class svm
    phi = calc_feature_vecs(comb.X)
    kern = get_kernel(phi[:,0:num_train], phi[:,0:num_train])
    ocsvm = OcSvmDualQP(kern, anom_prob)
    ocsvm.fit()
    kern = get_kernel(phi, phi)
    oc_as = ocsvm.apply(kern[num_train:,ocsvm.get_support_dual()])
    fpr, tpr, thres = metric.roc_curve(labels[num_train:], oc_as)
    base_auc = metric.auc(fpr, tpr)

    # train structured anomaly detection
    sad = LatentOCSVM(train, anom_prob)
    sad.fit(max_iter=40)
    pred_vals, pred_lats = sad.apply(test)
    fpr, tpr, thres = metric.roc_curve(labels[num_train:], pred_vals)
    auc = metric.auc(fpr, tpr)
    return auc, base_auc
Example #2
0
def experiment_anomaly_detection(train, test, comb, num_train, anom_prob, labels):
    # train one-class svm
    phi = calc_feature_vecs(comb.X)
    kern = get_kernel(phi[:,0:num_train], phi[:,0:num_train])
    ocsvm = OcSvmDualQP(kern, anom_prob)
    ocsvm.fit()
    kern = get_kernel(phi, phi)
    oc_as = ocsvm.apply(kern[num_train:,ocsvm.get_support_dual()])
    fpr, tpr, thres = metric.roc_curve(labels[num_train:], oc_as)
    base_auc = metric.auc(fpr, tpr)

    # train structured anomaly detection
    sad = LatentOCSVM(train, anom_prob)
    sad.fit(max_iter=40)
    pred_vals, pred_lats = sad.apply(test)
    fpr, tpr, thres = metric.roc_curve(labels[num_train:], pred_vals)
    auc = metric.auc(fpr, tpr)
    return auc, base_auc
Example #3
0
 def predict(self, Y):
     # build test kernel
     kernel = get_kernel(Y, self.X[:, self.svs], self.kernel, self.kparam)
     # kernel = Kernel.get_kernel(Y, self.X, self.kernel, self.kparam)
     # for svdd we need the data norms additionally
     norms = get_diag_kernel(Y, self.kernel)
     # number of training examples
     res = self.cTc - 2. * kernel.dot(self.get_support()).T + norms
     # res = self.cTc - 2. * kernel.dot(self.alphas).T + norms
     return res.reshape(Y.shape[1]) - self.radius2
Example #4
0
    def train(self, max_iter=50):
        """ Solve the LatentSVDD optimization problem with a
            sequential convex programming/DC-programming
            approach:
            Iteratively, find the most likely configuration of
            the latent variables and then, optimize for the
            model parameter using fixed latent states.
        """
        N = self.sobj.get_num_samples()
        DIMS = self.sobj.get_num_dims()

        # intermediate solutions
        # latent variables
        latent = [0] * N

        sol = 10.0 * normal(DIMS, 1)
        psi = matrix(0.0, (DIMS, N))  # (dim x exm)
        old_psi = matrix(0.0, (DIMS, N))  # (dim x exm)
        threshold = 0

        obj = -1
        iter = 0

        # terminate if objective function value doesn't change much
        while iter < max_iter and (
                iter < 2 or sum(sum(abs(np.array(psi - old_psi)))) >= 0.001):
            print('Starting iteration {0}.'.format(iter))
            print(sum(sum(abs(np.array(psi - old_psi)))))
            iter += 1
            old_psi = matrix(psi)

            # 1. linearize
            # for the current solution compute the
            # most likely latent variable configuration
            for i in range(N):
                # min_z ||sol - Psi(x,z)||^2 = ||sol||^2 + min_z -2<sol,Psi(x,z)> + ||Psi(x,z)||^2
                # Hence => ||sol||^2 - max_z  2<sol,Psi(x,z)> - ||Psi(x,z)||^2
                foo, latent[i], psi[:,
                                    i] = self.sobj.argmax(sol,
                                                          i,
                                                          opt_type='quadratic')

            # 2. solve the intermediate convex optimization problem
            kernel = get_kernel(psi, psi)
            svdd = SvddDualQP(kernel, self.C)
            svdd.fit()
            threshold = svdd.get_radius()
            inds = svdd.svs
            alphas = svdd.get_support()
            sol = psi[:, inds] * alphas

        self.sol = sol
        self.latent = latent
        return sol, latent, threshold
Example #5
0
    def fit(self, X, max_iter=-1, center=True, normalize=True):
        """
        :param X: Data matrix is assumed to be feats x samples.
        :param max_iter: *ignored*, just for compatibility.
        :return: Alphas and threshold for dual SVDDs.
        """
        self.X = X.copy()
        dims, self.samples = X.shape
        if self.samples < 1:
            print('Invalid training data.')
            return -1

        # number of training examples
        N = self.samples

        kernel = get_kernel(X, X, self.kernel, self.kparam)
        if center:
            kernel = center_kernel(kernel)
        if normalize:
            kernel = normalize_kernel(kernel)

        norms = np.diag(kernel).copy()

        if self.nu >= 1.0:
            print("Center-of-mass solution.")
            self.alphas = np.ones(self.samples) / float(self.samples)
            self.radius2 = 0.0
            self.svs = np.array(range(self.samples), dtype='i')
            self.pobj = 0.0  # TODO: calculate real primal objective
            self.cTc = self.alphas[self.svs].T.dot(
                kernel[self.svs, :][:, self.svs].dot(self.alphas[self.svs]))
            return self.alphas, self.radius2

        C = 1. / np.float(self.samples * self.nu)

        # generate a kernel matrix
        P = 2.0 * matrix(kernel)

        # this is the diagonal of the kernel matrix
        q = -matrix(norms)

        # sum_i alpha_i = A alpha = b = 1.0
        A = matrix(1.0, (1, N))
        b = matrix(1.0, (1, 1))

        # 0 <= alpha_i <= h = C
        G1 = spmatrix(1.0, range(N), range(N))
        G = sparse([G1, -G1])
        h1 = matrix(C, (N, 1))
        h2 = matrix(0.0, (N, 1))
        h = matrix([h1, h2])

        sol = qp(P, q, G, h, A, b)

        # store solution
        self.alphas = np.array(sol['x'], dtype=np.float)
        self.pobj = -sol['primal objective']

        # find support vectors
        self.svs = np.where(self.alphas > self.PRECISION)[0]
        # self.cTc = self.alphas[self.svs].T.dot(kernel[self.svs, :][:, self.svs].dot(self.alphas[self.svs]))
        self.cTc = self.alphas.T.dot(kernel.dot(self.alphas))

        # find support vectors with alpha < C for threshold calculation
        self.radius2 = 0.
        thres = self.predict(X[:, self.svs])
        self.radius2 = np.min(thres)
        return self.alphas, thres
Example #6
0
    def fit(self, max_iter=50, hotstart=None, prec=1e-3):
        """ Solve the optimization problem with a
            sequential convex programming/DC-programming
            approach:
            Iteratively, find the most likely configuration of
            the latent variables and then, optimize for the
            model parameter using fixed latent states.
        """
        N = self.sobj.get_num_samples()
        DIMS = self.sobj.get_num_dims()

        # intermediate solutions
        # latent variables
        latent = [0.0]*N

        sol = self.sobj.get_hotstart_sol()
        if hotstart is not None and hotstart.size == DIMS:
            print('New hotstart position defined.')
            sol = hotstart

        psi = np.zeros((DIMS, N))  # (dim x exm)
        old_psi = np.zeros((DIMS, N))  # (dim x exm)
        threshold = 0.

        obj = -1.
        iter = 0
        allobjs = []

        # terminate if objective function value doesn't change much
        while iter < max_iter and (iter < 2 or np.sum(abs(np.array(psi-old_psi))) >= prec):
            print('Starting iteration {0}.'.format(iter))
            print(np.sum(abs(np.array(psi-old_psi))))
            iter += 1
            old_psi = psi.copy()

            # 1. linearize
            # for the current solution compute the
            # most likely latent variable configuration
            for i in range(N):
                _, latent[i], psi[:,i] = self.sobj.argmax(sol, i)
                psi[:,i] /= np.linalg.norm(psi[:, i], ord=self.norm_ord)

            # 2. solve the intermediate convex optimization problem
            kernel = get_kernel(psi, psi)
            # kernel = center_kernel(kernel)
            # kernel = normalize_kernel(kernel)
            svm = OcSvmDualQP(kernel, self.nu)
            svm.fit()
            threshold = svm.get_threshold()

            self.svs_inds = svm.get_support_dual()
            sol = psi.dot(svm.get_alphas())

            # calculate objective
            self.threshold = threshold
            slacks = threshold - sol.T.dot(psi)
            slacks[slacks < 0.0] = 0.0
            obj = 0.5*sol.T.dot(sol) - threshold + 1./(np.float(N)*self.nu) * np.sum(slacks)
            print("Iter {0}: Values (Threshold-Slacks-Objective) = {1}-{2}-{3}".format(
                iter, threshold, np.sum(slacks), obj))
            allobjs.append(obj)

            # print '+++++++++'
            # print threshold
            # print slacks
            # print obj
            # print '+++++++++'
        self.slacks = slacks
        # print allobjs
        # print np.sum(abs(np.array(psi-old_psi)))
        # print '+++++++++ SAD END'
        self.sol = sol
        self.latent = latent
        return sol, latent, threshold