def test_pairwise_kernels(metric):
    # Test the pairwise_kernels helper function.

    rng = np.random.RandomState(0)
    X = rng.random_sample((5, 4))
    Y = rng.random_sample((2, 4))
    function = PAIRWISE_KERNEL_FUNCTIONS[metric]
    # Test with Y=None
    K1 = pairwise_kernels(X, metric=metric)
    K2 = function(X)
    assert_array_almost_equal(K1, K2)
    # Test with Y=Y
    K1 = pairwise_kernels(X, Y=Y, metric=metric)
    K2 = function(X, Y=Y)
    assert_array_almost_equal(K1, K2)
    # Test with tuples as X and Y
    X_tuples = tuple([tuple([v for v in row]) for row in X])
    Y_tuples = tuple([tuple([v for v in row]) for row in Y])
    K2 = pairwise_kernels(X_tuples, Y_tuples, metric=metric)
    assert_array_almost_equal(K1, K2)

    # Test with sparse X and Y
    X_sparse = csr_matrix(X)
    Y_sparse = csr_matrix(Y)
    if metric in ["chi2", "additive_chi2"]:
        # these don't support sparse matrices yet
        assert_raises(ValueError, pairwise_kernels,
                      X_sparse, Y=Y_sparse, metric=metric)
        return
    K1 = pairwise_kernels(X_sparse, Y=Y_sparse, metric=metric)
    assert_array_almost_equal(K1, K2)
Example #2
0
    def HilbertSchmidtNormIC(self,X,Y,metric='linear'):
        '''
            Procedure to calculate the Hilbert-Schmidt Independence Criterion described in
            "Measuring Statistical Dependence with Hilbert-Schmidt Norms", Arthur Gretton et al.

            Parameters
            ----------
            Assuming a joint distribution P(X,Y)
            X :
                list of X observations
            Y : list of Y obervations

            Returns
            -------
            (HSIC, fake-p-value scaling HSIC to [0,1])
        '''
        m = float(len(X))
        K = pairwise_kernels(X,X,metric=metric)
        L = pairwise_kernels(Y,Y,metric=metric)
        H = np.eye(m)-1/m

        res = (1/(m-1)**2 ) * np.trace(np.dot(np.dot(np.dot(K,H),L),H))

        #Another way, maybe..
        #CCm = pairwise_kernels(self.X,self.Y)
        #res = sum(np.linalg.eigvals(CCm))


        #Now using Gamma approximation to get a p-value
        bone = np.ones((int(m),int(m)))
        Kc = H*K*H
        Lc = H*L*H

        #fit Gamma to testStat*m
        testStat = 1/m * sum(sum(np.dot(np.transpose(Kc),Lc) ) )    #TEST STATISTIC: m*HSICb (under H1)
        varHSIC = (1/6 * np.dot(Kc,Lc))**2;
        varHSIC = 1/m/(m-1)* (  sum(  [sum(varHSIC[:,i]) for i in xrange(len(varHSIC))]   ) - (np.trace(varHSIC))  )
        varHSIC = 72*(m-4)*(m-5)/m/(m-1)/(m-2)/(m-3)  *  varHSIC #variance under H0

        _K = K-np.diag(np.diag(K));
        _L = L-np.diag(np.diag(L));

        muX = 1/m/(m-1)*np.transpose(bone)*(K*bone)
        muY = 1/m/(m-1)*np.transpose(bone)*(L*bone)

        mHSIC  = 1/m * ( 1 +muX*muY  - muX - muY )         #mean under H0

        al = mHSIC**2 / varHSIC;
        bet = varHSIC*m / mHSIC;   #threshold for hsicArr*m
        alpha = 0.05
        #This should be done, with varHSIC != 0
        #from scipy.special import gdtria
        #thresh = gdtria(1-alpha,al,bet)
        #print 'thresh:', thresh
        #return (res,1-(1/(1+res) ) )
        return (res,res )
Example #3
0
def gram(mat, args):
    '''Computes the Gram Matrix of mat according to the kernel specified in
    args'''

    if args.kernel in ['rbf', 'polynomial', 'poly', 'laplacian']:
        gamma = dict(gamma=10. / mat.shape[1])
        output = pairwise_kernels(mat, metric=args.kernel, n_jobs=-1,
            gamma=gamma)
    else:
        # gamma for chi squared should be left to default
        output = pairwise_kernels(mat, metric=args.kernel, n_jobs=-1)
    return output
Example #4
0
def test_kernel_versus_pairwise():
    # Check that GP kernels can also be used as pairwise kernels.
    for kernel in kernels:
        # Test auto-kernel
        if kernel != kernel_white:
            # For WhiteKernel: k(X) != k(X,X). This is assumed by
            # pairwise_kernels
            K1 = kernel(X)
            K2 = pairwise_kernels(X, metric=kernel)
            assert_array_almost_equal(K1, K2)

        # Test cross-kernel
        K1 = kernel(X, Y)
        K2 = pairwise_kernels(X, Y, metric=kernel)
        assert_array_almost_equal(K1, K2)
Example #5
0
	def transform(self, X):
	    """Apply feature map to X.

	    Computes an approximate feature map using the kernel
	    between some training points and X.

	    Parameters
	    ----------
	    X : array-like, shape=(n_samples, n_features)
	        Data to transform.

	    Returns
	    -------
	    X_transformed : array, shape=(n_samples, n_components)
	        Transformed data.
	    """
	    check_is_fitted(self, 'components_')
	    X = check_array(X, accept_sparse='csr')

	    kernel_params = self._get_kernel_params()
	    embedded = pairwise_kernels(X, self.components_,
	                                metric=self.kernel,
	                                filter_params=True,
	                                **kernel_params)
	    return np.dot(embedded, self.normalization_.T)
Example #6
0
 def links(self, data_matrix):
     data_size = data_matrix.shape[0]
     kernel_matrix = pairwise_kernels(data_matrix, metric=self.metric, **self.kwds)
     # compute instance density as average pairwise similarity
     density = np.sum(kernel_matrix, 0) / data_size
     # compute list of nearest neighbors
     kernel_matrix_sorted = np.argsort(-kernel_matrix)
     # make matrix of densities ordered by nearest neighbor
     density_matrix = density[kernel_matrix_sorted]
     # if a denser neighbor cannot be found then assign link to the instance itself
     link_ids = list(range(density_matrix.shape[0]))
     # for all instances determine link link
     for i, row in enumerate(density_matrix):
         i_density = row[0]
         # for all neighbors from the closest to the furthest
         for jj, d in enumerate(row):
             # proceed until n_nearest_neighbors have been explored
             if self.n_nearest_neighbors is not None and jj > self.n_nearest_neighbors:
                 break
             j = kernel_matrix_sorted[i, jj]
             if jj > 0:
                 j_density = d
                 # if the density of the neighbor is higher than the density of the instance assign link
                 if j_density > i_density:
                     link_ids[i] = j
                     break
     return link_ids
Example #7
0
 def parents(self, data_matrix, target=None):
     """parents."""
     data_size = data_matrix.shape[0]
     kernel_matrix = pairwise_kernels(data_matrix, metric=self.metric, **self.kwds)
     # compute instance density as 1 over average pairwise distance
     density = np.sum(kernel_matrix, 0) / data_size
     # compute list of nearest neighbors
     kernel_matrix_sorted = np.argsort(-kernel_matrix)
     # make matrix of densities ordered by nearest neighbor
     density_matrix = density[kernel_matrix_sorted]
     # if a denser neighbor cannot be found then assign parent to the
     # instance itself
     parent_ids = list(range(density_matrix.shape[0]))
     # for all instances determine parent link
     for i, row in enumerate(density_matrix):
         i_density = row[0]
         # for all neighbors from the closest to the furthest
         for jj, d in enumerate(row):
             j = kernel_matrix_sorted[i, jj]
             if jj > 0:
                 j_density = d
                 # if the density of the neighbor is higher than the
                 # density of the instance assign parent
                 if j_density > i_density:
                     parent_ids[i] = j
                     break
     return parent_ids
Example #8
0
    def transform(self, data_matrix):
        """Transforms features as the instance similarity to a set of instances as
        defined by the selector.

        Parameters
        ----------
        data_matrix : array, shape = (n_samples, n_features)
          Samples.

        Returns
        -------
        data_matrix : array, shape = (n_samples, n_features_new)
            Transformed array.
        """
        if self.selected_instances is None:
            raise Exception('Error: attempt to use transform on non fit model')
        if self.selected_instances.shape[0] == 0:
            raise Exception('Error: attempt to use transform using 0 selectors')
        # TODO: the first instance is more important than others in a selector, so it should
        # receive a weight proportional to the rank e.g. 1/rank^p
        # the selector should return also a rank information for each feature, note: for the
        # composite selector it is important to distinguish the rank of multiple selectors
        data_matrix_out = pairwise_kernels(data_matrix,
                                           Y=self.selected_instances,
                                           metric=self.metric,
                                           **self.kwds)
        if self.scale:
            data_matrix_out = self.scaler.transform(data_matrix_out) * self.scaling_factor
        return data_matrix_out
    def diag(self, X):
        """Returns the diagonal of the kernel k(X, X).

        The result of this method is identical to np.diag(self(X)); however,
        it can be evaluated more efficiently since only the diagonal is
        evaluated.

        Parameters
        ----------
        X : array, shape (n_samples_X, n_features)
            Left argument of the returned kernel k(X, Y)

        Returns
        -------
        K_diag : array, shape (n_samples_X,)
            Diagonal of kernel k(X, X)
        """
        prototypes_std = self.prototypes.std(0)
        n_prototypes = self.prototypes.shape[0]

        # kernel regression of noise levels
        K_pairwise = \
            pairwise_kernels(self.prototypes / prototypes_std,
                             X / prototypes_std,
                             metric="rbf", gamma=self.gamma)

        return (K_pairwise * self.sigma_2[:, None]).sum(axis=0) \
                / K_pairwise.sum(axis=0)
def KmeansForAgeEst2(db, where, users, n_clusters):
    X = []
    X_users = []
    centers = []
    est = []
    est_v = []
    for at in where:
        _users = [users[i] for i in at]
        X.append(pymongo_utill.toTimeFreq(db, _users))
        X_users.append(_users)
    for c, x in enumerate(X):
        km = KMeans(init='k-means++', n_clusters=n_clusters, n_init=10)
        km.fit(x)
        centers.append(km.cluster_centers_)
        max_0 = 0
        max_1 = 0
        est_0_v = ""
        est_1_v = ""
        for i, u in enumerate(x):
            sim = pairwise_kernels(km.cluster_centers_, u, metric="cosine")
            if max_0 < sim[0]:
                est_0 = X_users[c][i]
                max_0 = sim[0]
                est_0_v = u
            if max_1 < sim[1]:
                est_1 = X_users[c][i]
                max_1 = sim[1]
                est_1_v = u
        est.append((est_0, est_1))
        est_v.append((est_0_v, est_1_v))

    return centers
Example #11
0
    def decision_function(self, X):
        """Scores related to the ordering of the samples X.
        
        Note that higher scores correspond to higher rankings. For example,
        for three ordered samples (say ranks 1, 2, 3) we would expect the
        corresponding scores to decrease (say 9.5, 6.2, 3.5).
        
        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            Training vectors.
            
        Returns
        -------
        scores : array-like, shape = [n_samples]
            The higher the score, the higher the rank. For example,
            if the x_1's rank is 1 and x_2's rank is 2, then
            x_1's score will be higher than x_2's score.        
            
        """
        
        if self._rank_vectors is None:
            raise Exception('Attempted to predict before fitting model')

        alpha = self._alpha
        gram_matrix = pairwise.pairwise_kernels(self._rank_vectors, X, metric=self.kernel)
        scores = np.sum(alpha[alpha != 0, np.newaxis] * gram_matrix, 0)
        return scores
Example #12
0
 def fit(self, X, y, unlabeled_data=None):
   num_data = X.shape[0] + unlabeled_data.shape[0]
   num_labeled = X.shape[0]
   num_unlabeled = unlabeled_data.shape[0]
   labeled = np.zeros((num_data,), dtype=np.float32)
   labeled[0:num_labeled] = 1.0
   if issparse(X):
     self.X_ = vstack((util.cast_to_float32(X),
                       util.cast_to_float32(unlabeled_data)), format='csr')
   else:
     self.X_ = np.concatenate((util.cast_to_float32(X),
                               util.cast_to_float32(unlabeled_data)))
   self.gamma = (
       self.gamma if self.gamma is not None else 1.0 / X.shape[1])
   self.kernel_params = {'gamma':self.gamma, 'degree':self.degree, 'coef0':self.coef0}
   kernel_matrix = pairwise_kernels(self.X_, metric=self.kernel,
                                    filter_params=True, **self.kernel_params)
   A = np.dot(np.diag(labeled), kernel_matrix)
   if self.nu2 != 0:
     if self.kernel == 'rbf':
       laplacian_kernel_matrix = kernel_matrix
     else:
       laplacian_kernel_matrix = rbf_kernel(self.X_, gamma=self.gamma)
     laplacian_x_kernel = np.dot(graph_laplacian(
         laplacian_kernel_matrix, normed=self.normalize_laplacian), kernel_matrix)
     A += self.nu2 * laplacian_x_kernel
   y = np.concatenate((y, -np.ones((num_unlabeled,), dtype=np.float32)),
                      axis=0)
   super(LapRLSC, self).fit(A, y, class_for_unlabeled=-1)
Example #13
0
 def select(self, data_matrix, target=None):
     """select."""
     # extract difference matrix
     kernel_matrix = pairwise_kernels(data_matrix,
                                      metric=self.metric, **self.kwds)
     # set minimum value
     m = - 1
     # set diagonal to 0 to remove self similarity
     np.fill_diagonal(kernel_matrix, 0)
     # iterate size - k times, i.e. until only k instances are left
     for t in range(data_matrix.shape[0] - self.n_instances):
         # find pairs with largest kernel
         (max_i, max_j) = np.unravel_index(
             np.argmax(kernel_matrix), kernel_matrix.shape)
         # choose one instance at random
         if random.random() > 0.5:
             id = max_i
         else:
             id = max_j
         # remove instance with highest score by setting all its pairwise
         # similarity to min value
         kernel_matrix[id, :] = m
         kernel_matrix[:, id] = m
     # extract surviving elements, i.e. element that have 0 on the diagonal
     selected_instances_ids = np.array(
         [i for i, x in enumerate(np.diag(kernel_matrix)) if x == 0])
     return selected_instances_ids
Example #14
0
def test_cosine_kernel():
    """ Test the cosine_kernels. """

    rng = np.random.RandomState(0)
    X = rng.random_sample((5, 4))
    Y = rng.random_sample((3, 4))
    Xcsr = csr_matrix(X)
    Ycsr = csr_matrix(Y)

    for X_, Y_ in ((X, None), (X, Y), (Xcsr, None), (Xcsr, Ycsr)):
        # Test that the cosine is kernel is equal to a linear kernel when data
        # has been previously normalized by L2-norm.
        K1 = pairwise_kernels(X_, Y=Y_, metric="cosine")
        X_ = normalize(X_)
        if Y_ is not None:
            Y_ = normalize(Y_)
        K2 = pairwise_kernels(X_, Y=Y_, metric="linear")
Example #15
0
 def score(self, Xh, yh):
     # not really a score, more a loss
     lambdak = self.alpha0
     K_pred = pairwise_kernels(Xh, self.Xt, gamma=np.exp(lambdak[0]),
                               metric='rbf')
     pred = K_pred.dot(self.dual_coef_)
     v = yh - pred
     return v.dot(v)
Example #16
0
 def h_sol_approx(x, lambdak, tol):
     # returns an approximate solution of the inner optimization
     K = pairwise_kernels(Xt, gamma=np.exp(lambdak[0]), metric='rbf')
     (out, success) = splinalg.cg(
         K + np.exp(lambdak[1]) * np.eye(x0.size), yt, x0=x)
     if success is False:
         raise ValueError
     return out
Example #17
0
def test_lower_bound_multi_rbf():
    K = pairwise_kernels(mult_dense, metric="rbf", gamma=0.1)
    Cmin = C_lower_bound(K, mult_target)
    Cmin2 = C_lower_bound(mult_dense, mult_target, kernel="rbf", gamma=0.1)
    Cmin3 = C_lower_bound(mult_dense, mult_target, kernel="rbf", gamma=0.1,
                          search_size=60, random_state=0)
    assert_almost_equal(Cmin, Cmin2, 4)
    assert_almost_equal(Cmin, Cmin3, 4)
Example #18
0
def test_fit_reg_squared_loss_nn_l2():
    K = pairwise_kernels(digit.data, metric="poly", degree=4)
    clf = CDRegressor(C=1, random_state=0, penalty="nnl2",
                      loss="squared", max_iter=100)
    clf.fit(K, digit.target)
    y_pred = (clf.predict(K) > 0.5).astype(int)
    acc = np.mean(digit.target == y_pred)
    assert_almost_equal(acc, 0.9444, 3)
def test_pairwise_kernels_callable():
    # Test the pairwise_kernels helper function
    # with a callable function, with given keywords.
    rng = np.random.RandomState(0)
    X = rng.random_sample((5, 4))
    Y = rng.random_sample((2, 4))

    metric = callable_rbf_kernel
    kwds = {'gamma': 0.1}
    K1 = pairwise_kernels(X, Y=Y, metric=metric, **kwds)
    K2 = rbf_kernel(X, Y=Y, **kwds)
    assert_array_almost_equal(K1, K2)

    # callable function, X=Y
    K1 = pairwise_kernels(X, Y=X, metric=metric, **kwds)
    K2 = rbf_kernel(X, Y=X, **kwds)
    assert_array_almost_equal(K1, K2)
   def fit(self,PHI,targets, Parameters):
  
       if (self.kind=='rbf'or self.kind=='sigmoid'or self.kind=="polynomial" or self.kind== "lin" or self.kind =="cosine"):
           
           # save target need for kernel interpellation  
           self.targets=targets;
           
           #parameters  
           self.Parameters=Parameters;
           #Kernel matrix 
           K=pairwise_kernels(PHI[:,:],PHI[:,:],metric=self.kind,filter_params= self.Parameters);
           
          #To make a prediction on sample x using n-training sample  sum with respect to xn i.e sum  k(x,xn) 
          #must equal one i.e   sum  k(x,xn)=1
           Normalization=np.power(np.tile(np.sum(K,1),(K.shape[1], 1)),-1) ;
           K=Normalization*K;
           
           # Prediction using training data 
           y_pred=np.dot(K,targets);
           
           # Prediction varance  
           self.bata=np.var(targets-y_pred);
           # Prediction variance of each sample  
           self.sigma=(self.bata)+np.diag(K)/(self.bata);
           
           self.trainingdata=PHI;
           
  
       if (self.kind=='basis'):
            self.targets=targets;
            self.Parameters;
            
            dim=PHI.shape[1];
            S0=np.identity(dim);
            Parameter=np.array(Parameters);
    
            if (Parameter.size==1):
                #zero mean ,broad prior, with one parameter with maximum likelihood estimation of prior 
                Lambda=Parameter[0];
                self.Sn=np.linalg.inv(Lambda*S0+np.dot(PHI.transpose(),PHI))
                self.Mn=np.dot(self.Sn, np.dot(PHI.transpose(),targets))
                # Prediction of training data 
                y_pred=np.dot(PHI,self.Mn)
                self.bata=np.var(targets-y_pred);
                self.sigma=(self.bata)+np.diag(np.dot(PHI,np.dot(self.Sn,PHI.transpose())));

            if (Parameter.size==2):
                 #zero mean ,broad prior, with two parameter 
                alfa=Parameter[0];
                bata=Parameter[1];
                self.Sn=np.linalg.inv(alfa*S0+bata*np.dot(PHI.transpose(),PHI))
                self.Mn=bata*np.dot(self.Sn, np.dot(PHI.transpose(),targets))
                # Prediction of training data 
                y_pred=np.dot(PHI,self.Mn)
                #Calculate noise variance on training data using MAP estimate 
                self.bata=np.var(targets-y_pred);
                # prediction variance on training data  
                self.sigma=(self.bata)+np.diag(np.dot(PHI,np.dot(self.Sn,PHI.transpose())));
Example #21
0
 def _most_representative(self, structs):
     # compute kernel matrix with sequence_vectorizer
     data_matrix = self.sequence_vectorizer.transform(structs)
     kernel_matrix = pairwise_kernels(data_matrix, metric='rbf', gamma=1)
     # compute instance density as 1 over average pairwise distance
     density = np.sum(kernel_matrix, 0) / data_matrix.shape[0]
     # compute list of nearest neighbors
     max_id = np.argsort(-density)[0]
     return max_id
Example #22
0
 def _get_kernel(self, X, Y=None):
     if callable(self.kernel):
         params = self.kernel_params or {}
     else:
         params = {"gamma": self.gamma,
                   "degree": self.degree,
                   "coef0": self.coef0}
     return pairwise_kernels(X, Y, metric=self.kernel,
                             filter_params=True, **params)
Example #23
0
def gram(mat, args):
	'''Computes the gram matrix of mat according to a specified kernel function'''
	kwargs = {}

	if args.kernel in ['rbf', 'polynomial', 'poly', 'laplacian']:
		# gamma for chi squared should be left to default
		kwargs = dict(gamma=10. / mat_a.shape[1])
	output = pairwise_kernels(mat, metric=args.kernel, n_jobs=1, **kwargs)
	return output
Example #24
0
 def g_cross(x, lambdak):
     K_pred = pairwise_kernels(
         Xh, Xt, gamma=np.exp(lambdak[0]), metric='rbf')
     K_pred_prime = -np.exp(lambdak[0]) * euclidean_distances(
         Xh, Xt, squared=True) * K_pred
     pred = K_pred.dot(x)
     v = yh - pred
     tmp = K_pred_prime.dot(x)
     return np.array((- 2 * tmp.dot(v), 0.0))
def test_pairwise_kernels_filter_param():
    rng = np.random.RandomState(0)
    X = rng.random_sample((5, 4))
    Y = rng.random_sample((2, 4))
    K = rbf_kernel(X, Y, gamma=0.1)
    params = {"gamma": 0.1, "blabla": ":)"}
    K2 = pairwise_kernels(X, Y, metric="rbf", filter_params=True, **params)
    assert_array_almost_equal(K, K2)

    assert_raises(TypeError, pairwise_kernels, X, Y, "rbf", **params)
Example #26
0
    def build_graph(self,
                    data_matrix=None,
                    target=None,
                    k=3,
                    k_quick_shift=1,
                    k_outliers=5,
                    knn_horizon=5):
        """Build graph."""
        size = data_matrix.shape[0]
        # make kernel
        kernel_matrix = pairwise_kernels(data_matrix,
                                         metric=self.metric,
                                         **self.kwds)
        # compute instance density as average pairwise similarity
        density = np.sum(kernel_matrix, 0) / size
        # compute list of nearest neighbors
        distance_matrix = pairwise_distances(data_matrix)
        knn_ids = np.argsort(distance_matrix)
        # make matrix of densities ordered by nearest neighbor
        density_matrix = density[knn_ids]

        # make a graph with instances as nodes
        graph = nx.Graph()
        for v in range(size):
            graph.add_node(v, group=target[v], outlier=False)

        # build knn edges
        if k > 0:
            # find the closest selected instance and instantiate knn edges
            graph = self._add_knn_links(
                graph,
                target,
                kernel_matrix=kernel_matrix,
                knn_ids=knn_ids,
                nneighbors_th=k)
            self._annotate_outliers(
                graph,
                nneighbors_th=k_outliers,
                kernel_matrix=kernel_matrix,
                knn_ids=knn_ids)

        # build shift tree
        for th in range(1, k_quick_shift + 1):
            link_ids = self._kernel_shift_links(
                kernel_matrix=kernel_matrix,
                density_matrix=density_matrix,
                knn_ids=knn_ids,
                k_quick_shift=th,
                target=target,
                knn_horizon=knn_horizon)
            for i, link in enumerate(link_ids):
                if i != link:
                    graph.add_edge(i, link, edge_type='shift', rank=th)
        graph = self._compute_edge_len(graph, data_matrix, target)
        return graph
Example #27
0
 def decision_function(self, X):
     if self.mode == 'exact':
         K = pairwise_kernels(
             X, self.X_train_,
             metric=self.kernel,
             filter_params=True,
             gamma=self.gamma
         )
     else:
         K = self.kernel_sampler_.transform(X)
     return super(SparseKernelClassifier, self).decision_function(K)
Example #28
0
 def _get_kernel(self, X, Y=None):
     params = {"gamma": self.gamma,
               "degree": self.degree,
               "coef0": self.coef0}
     try:
         return pairwise_kernels(X, Y, metric=self.kernel,
                                 filter_params=True, **params)
     except AttributeError:
         raise ValueError("%s is not a valid kernel. Valid kernels are: "
                          "rbf, poly, sigmoid, linear and precomputed."
                          % self.kernel)
Example #29
0
    def fit(self, Xt, yt):
        self.Xt = Xt
        x0 = np.zeros(Xt.shape[0])

        # returns an approximate solution of the inner optimization
        K = pairwise_kernels(Xt, gamma=np.exp(self.alpha0[0]), metric='rbf')
        (out, success) = splinalg.cg(
            K + np.exp(self.alpha0[1]) * np.eye(x0.size), yt, x0=x0)
        if success is False:
            raise ValueError
        self.dual_coef_ = out
Example #30
0
 def _get_kernel(self, X, Y=None):
     if callable(self.kernel):
         params = self.kernel_params or {}
     else:
         params = {"gamma": self.gamma,
                   "degree": self.degree,
                   "coef0": self.coef0}
     if self.kernel == "robust_kernel":
         return trimmedrbf_kernel(X, Y, gamma=self.gamma, robust_gamma = self.robust_gamma)                  
     else:
         return pairwise_kernels(X, Y, metric=self.kernel,
                             filter_params=True, **params)
Example #31
0
    def test(self,
             dataset: BaseADDataset,
             device: str = 'cpu',
             n_jobs_dataloader: int = 0):
        """Tests the SSAD model on the test data."""
        logger = logging.getLogger()

        #_, test_loader = dataset.loaders(batch_size=128, num_workers=n_jobs_dataloader)
        test_loader = self.my_dataset.test_loader
        # Get data from loader
        idx_label_score = []
        X = ()
        idxs = []
        labels = []
        for data in test_loader:
            inputs, label_batch, _, idx = data
            inputs, label_batch, idx = inputs.to(device), label_batch.to(
                device), idx.to(device)
            if self.hybrid:
                inputs = self.ae_net.encoder(
                    inputs
                )  # in hybrid approach, take code representation of AE as features
            X_batch = inputs.view(
                inputs.size(0), -1
            )  # X_batch.shape = (batch_size, n_channels * height * width)
            X += (X_batch.cpu().data.numpy(), )
            idxs += idx.cpu().data.numpy().astype(np.int64).tolist()
            labels += label_batch.cpu().data.numpy().astype(np.int64).tolist()
        X = np.concatenate(X)

        # Testing
        logger.info('Starting testing...')
        start_time = time.time()

        # Build kernel
        kernel = pairwise_kernels(X,
                                  self.X_svs,
                                  metric=self.kernel,
                                  gamma=self.gamma)

        scores = (-1.0) * self.model.apply(kernel)

        self.results['test_time'] = time.time() - start_time
        scores = scores.flatten()
        self.rho = -self.model.threshold

        # Save triples of (idx, label, score) in a list
        idx_label_score += list(zip(idxs, labels, scores.tolist()))
        self.results['test_scores'] = idx_label_score

        # Compute AUC
        _, labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)
        self.results['test_auc'] = roc_auc_score(labels, scores)

        # If hybrid, also test model with linear kernel
        if self.hybrid:
            start_time = time.time()
            linear_kernel = pairwise_kernels(X,
                                             self.linear_X_svs,
                                             metric='linear')
            scores_linear = (-1.0) * self.linear_model.apply(linear_kernel)
            self.results['test_time_linear'] = time.time() - start_time
            scores_linear = scores_linear.flatten()
            self.results['test_auc_linear'] = roc_auc_score(
                labels, scores_linear)
            logger.info('Test AUC linear model: {:.2f}%'.format(
                100. * self.results['test_auc_linear']))
            logger.info('Test Time linear model: {:.3f}s'.format(
                self.results['test_time_linear']))

        # Log results
        logger.info('Test AUC: {:.2f}%'.format(100. *
                                               self.results['test_auc']))
        logger.info('Test Time: {:.3f}s'.format(self.results['test_time']))
        logger.info('Finished testing.')
        #normalizing
        doc_term_matrix_tfidf_l2 = []
        for tf_vector in doc_term_matrix_tfidf:
            doc_term_matrix_tfidf_l2.append(l2_normalizer(tf_vector))
        hasil_tfidf = np.matrix(doc_term_matrix_tfidf_l2)

        st.subheader("l2 tfidf normalizer")
        frequency_TFIDF = pd.DataFrame(hasil_tfidf,
                                       index=id_requirement,
                                       columns=kolom_df)
        st.write(frequency_TFIDF)

        st.subheader("IR using cosine")
        X = np.array(hasil_tfidf[0:])
        Y = np.array(hasil_tfidf)
        cosine_similaritas = pairwise_kernels(X, Y, metric='linear')
        cosine_df = pd.DataFrame(cosine_similaritas,
                                 index=id_requirement,
                                 columns=id_requirement)
        st.write(cosine_df)

        # klaster
        klaster_value = st.sidebar.slider("Berapa Cluster?", 0, 5,
                                          len(id_requirement))
        kmeans = KMeans(
            n_clusters=klaster_value
        )  # You want cluster the passenger records into 2: Survived or Not survived
        kmeans_df = kmeans.fit(cosine_similaritas)
        st.subheader("K-Means Cluster")

        correct = 0
Example #33
0
def similarity_graph_from_term_document_matrix(sp_mat):
    dx = pairwise_kernels(sp_mat, metric='cosine')
    g = nx.from_numpy_matrix(dx)

    return g
Example #34
0
def kernel_mat_pair(f, x, y=None):
    return pairwise_kernels(x, y, f)
Example #35
0
# compare the eigenvalues of kernel matrics

# In[108]:

from sklearn.metrics.pairwise import pairwise_kernels
from numpy.linalg import eigvals

# In[109]:

kernel_list = ['linear', 'rbf', 'poly', 'sigmoid']

for kernel in kernel_list:
    k = 10

    kernel_matrix = pairwise_kernels(credit, metric=kernel)
    print "Kernel is " + str(kernel)

    print eigvals(kernel_matrix)

    print "\n"

# In[156]:

kernel_matrix.shape

# In[157]:

eigvals(kernel_matrix).shape

# In[144]:
Example #36
0
def prototype_selection(X, subsample=20, kernel='rbf'):
    return greedy_select_protos(
        pairwise_kernels(X, metric=kernel), np.array(range(X.shape[0])),
        subsample) if subsample > 1 else np.array(range(X.shape[0]))
Example #37
0
 def predict(self, X):
     K = pairwise_kernels(self.X, X, metric=self.kernel, gamma=self.gamma)
     return (K * self.y[:, None]).sum(axis=0) / K.sum(axis=0)
def rbf_kernels(X, n_jobs):
    return pairwise_kernels(X, metric="rbf", n_jobs=n_jobs, gamma=0.1)
def linear_kernel_test(testK, origK, n_jobs):
    return pairwise_kernels(testK, origK, metric="linear", n_jobs=n_jobs)
Example #40
0
    def estimate_Gap_statistics(self, nrefs):
        masknans = pl.ma.masked_not_equal(self._X[:, 0], 0).mask
        minvals = self._X[masknans, :].min(axis=0)
        maxvals = self._X[masknans, :].max(axis=0)
        meanvals = self._X[masknans, :].mean(axis=0)
        stdvals = self._X[masknans, :].std(axis=0)
        ref_Affinity = []
        Dref = []

        # Compute a random uniform reference distribution of features
        # precompute Distances and affinities.
        for i in range(nrefs):

            random_X = pl.ones_like(self._X)
            # random_X [:,0 ] =np.random.uniform (low = minvals[0] , high=maxvals[0], size=pl.int_( self._X.shape[0]/10 ) )
            random_X[:, 1] = np.random.uniform(
                low=pl.quantile(q=0.16, a=self._X[masknans, 1]),
                high=pl.quantile(q=0.16, a=self._X[masknans, 1]),
                size=pl.int_(self._X.shape[0]),
            )
            random_X[:, 0] = np.random.normal(loc=meanvals[0],
                                              scale=stdvals[0],
                                              size=pl.int_(self._X.shape[0]))
            ref_D = self._metric.pairwise(random_X)
            ref_D = pl.ma.fix_invalid(ref_D, fill_value=1.0).data

            Dref.append(ref_D)

            ref_Affinity.append(pairwise_kernels(ref_D, metric="precomputed"))

        self.Gaps = pl.zeros(len(self.Kvals))
        self.sd = self.Gaps * 0.0
        self.W = self.Gaps * 0.0  # KL index
        p = self._nfeat
        for j, K in enumerate(self.Kvals):
            if self.verbose:
                print(f"Running with K={K} clusters")
            self.clusters = AgglomerativeClustering(
                n_clusters=K,
                affinity="precomputed",
                linkage="average",
                connectivity=self.connectivity,
            )
            self.clusters.fit_predict(self._Affinity)
            # estimate WCSS for the samples
            W = self.get_WCSS(K, self.clusters.labels_, self._distance_matr)
            self.W[j] = W
            # estimate WCSS for random samples
            ref_W = pl.zeros(nrefs)

            for i in range(nrefs):
                ref_clusters = AgglomerativeClustering(
                    n_clusters=K,
                    affinity="precomputed",
                    linkage="average",
                    connectivity=self.connectivity,
                )
                ref_clusters.fit_predict(ref_Affinity[i])
                ref_W[i] = self.get_WCSS(K, ref_clusters.labels_, Dref[i])

            self.sd[j] = np.std(np.log(ref_W)) * np.sqrt(1 + 1.0 / nrefs)
            self.Gaps[j] = np.mean(np.log(ref_W)) - np.log(W)

        ## see section 4 of Tibishrani et al. http://web.stanford.edu/~hastie/Papers/gap.pdf

        gaps_criterion = pl.array(
            [self.Kvals[:-1], self.Gaps[:-1] - self.Gaps[1:] + self.sd[1:]])
        mask = pl.array(gaps_criterion[1, :] >= 0)
        return pl.int_(gaps_criterion[0, mask][0])
Example #41
0
def cluster_tightness(data, metric='cosine'):
    centroid = np.mean(data, axis=0).reshape(1, -1)
    return np.mean(pairwise_kernels(data, centroid, metric=metric))
Example #42
0
    def __init__(
        self,
        features,
        nfeatures,
        nside=16,
        include_haversine=False,
        galactic_mask=None,
        affinity="euclidean",
        scaler=preprocessing.StandardScaler(),
        file_affinity="",
        verbose=False,
        save_affinity=False,
        feature_weights=None,
    ):
        """
        -features: list of features to cluster
        -nfeatures

        """
        self._nside = nside
        self._nfeat = nfeatures
        if galactic_mask is None:
            self.galactic_mask = np.bool_(pl.ones_like(features[0]))
        else:
            self.galactic_mask = galactic_mask
        features[0] = features[0][galactic_mask]
        features[1] = features[1][galactic_mask]

        if self._nfeat > 1:
            assert features[0].shape[0] == features[1].shape[0]
            self._npix = features[0].shape[0]  # hp.nside2npix(nside)
        else:
            self._npix = features.shape[0]
        if feature_weights is None:
            feature_weights = pl.ones(self._nfeat)
        self.verbose = verbose
        self._X = pl.zeros((self._npix, self._nfeat))

        if self._nfeat == 1:
            features = [features]
        for i, x in zip(range(self._nfeat), features):
            self._X[:, i] = x
        # Standard rescaling of all the features
        if scaler is not None:
            self._X = scaler.fit_transform(self._X)

        for i in range(self._nfeat):
            self._X[:, i] *= feature_weights[i]

        self.estimate_affinity(affinity, file_affinity)

        self._has_angles = False
        if include_haversine:
            self._has_angles = True
            self.estimate_haversine()

        self._Affinity = pairwise_kernels(self._distance_matr,
                                          metric="precomputed")

        if save_affinity:
            pl.save(file_affinity, self._Affinity)
Example #43
0
 def gaussian(x, **kwargs):
     return pairwise_kernels(x, x, metric="rbf", **kwargs)
#!/usr/bin/env python

import numpy as np
from sklearn.preprocessing import KernelCenterer
from sklearn.metrics.pairwise import pairwise_kernels


X = np.array([[ 1., -2.,  2.], [ -2.,  1.,  3.], [ 4.,  1., -2.]])
K = pairwise_kernels(X, metric='linear')


transformer = KernelCenterer().fit(K)
centered_K = transformer.transform(K)
print(centered_K)



H = np.eye(3) - (1.0/3)*np.ones((3,3))
centered_K = H.dot(K).dot(H)
print(centered_K)
 def _calc_kernel(self, X, Y=None):
     return pairwise_kernels(X, Y, metric=self.kernel_type)
Example #46
0
 def transform(self, X, Y=None):
     n = X.shape[1]
     H = np.eye(n) - ((1 / n) * np.ones((n, n)))
     kernel_X_X = pairwise_kernels(X=X.T, Y=X.T, metric=self.kernel)
     X_transformed = (self.Theta.T).dot(H).dot(kernel_X_X).dot(H)
     return X_transformed
mae_cv = np.zeros((n_folds, 1))

# --------------------------------------------------------------------------
for i_fold, (train_idx, test_idx) in enumerate(kf.split(x, y)):
    x_train, x_test = x[train_idx], x[test_idx]
    y_train, y_test = y[train_idx], y[test_idx]

    print('CV iteration: %d' % (i_fold + 1))

    # --------------------------------------------------------------------------
    # Model
    gpr = GaussianProcessRegressor()

    X = np.atleast_2d(x)
    gramm_kernel = pairwise_kernels(X, metric='precomputed',filter_params=False)
    # --------------------------------------------------------------------------
    # Model selection
    # Search space
    param_grid = {'kernel': [RBF(), WhiteKernel(), gramm_kernel]}

    # Gridsearch
    internal_cv = KFold(n_splits=5)
    grid_cv = GridSearchCV(estimator=gpr,
                           param_grid=param_grid,
                           cv=internal_cv,
                           scoring='neg_mean_absolute_error',
                           verbose=1,
                           n_jobs=1)

    # --------------------------------------------------------------------------
 def _density_func(self, data_matrix, target=None):
     kernel_matrix = pairwise_kernels(data_matrix, metric=self.metric, **self.kwds)
     # compute instance density as average pairwise similarity
     densities = np.mean(kernel_matrix, 0)
     return densities
Example #49
0
    def train(self,
              dataset: BaseADDataset,
              device: str = 'cpu',
              n_jobs_dataloader: int = 0):
        """Trains the SSAD model on the training data."""
        logger = logging.getLogger()

        # do not drop last batch for non-SGD optimization shallow_ssad
        #train_loader = DataLoader(dataset=dataset.train_set, batch_size=128, shuffle=True,num_workers=n_jobs_dataloader, drop_last=False)
        train_loader = self.my_dataset.test_loader
        # Get data from loader
        X = ()
        semi_targets = []
        for data in train_loader:
            inputs, _, semi_targets_batch, _ = data
            inputs, semi_targets_batch = inputs.to(
                device), semi_targets_batch.to(device)
            if self.hybrid:
                inputs = self.ae_net.encoder(
                    inputs
                )  # in hybrid approach, take code representation of AE as features
            X_batch = inputs.view(
                inputs.size(0), -1
            )  # X_batch.shape = (batch_size, n_channels * height * width)
            X += (X_batch.cpu().data.numpy(), )
            semi_targets += semi_targets_batch.cpu().data.numpy().astype(
                np.int).tolist()
        X, semi_targets = np.concatenate(X), np.array(semi_targets)

        # Training
        logger.info('Starting training...')

        # Select model via hold-out test set of 1000 samples
        gammas = np.logspace(-7, 2, num=10, base=2)
        best_auc = 0.0

        # Sample hold-out set from test set
        #_, test_loader = dataset.loaders(batch_size=128, num_workers=n_jobs_dataloader)
        #---------------------------------------------
        test_loader = self.my_dataset.test_loader

        X_test = ()
        labels = []
        for data in test_loader:
            inputs, label_batch, _, _ = data
            inputs, label_batch = inputs.to(device), label_batch.to(device)
            if self.hybrid:
                inputs = self.ae_net.encoder(
                    inputs
                )  # in hybrid approach, take code representation of AE as features
            X_batch = inputs.view(
                inputs.size(0), -1
            )  # X_batch.shape = (batch_size, n_channels * height * width)
            X_test += (X_batch.cpu().data.numpy(), )
            labels += label_batch.cpu().data.numpy().astype(np.int64).tolist()
        X_test, labels = np.concatenate(X_test), np.array(labels)
        n_test, n_normal, n_outlier = len(X_test), np.sum(labels == 0), np.sum(
            labels == 1)
        n_val = int(0.1 * n_test)
        n_val_normal, n_val_outlier = int(n_val * (n_normal / n_test)), int(
            n_val * (n_outlier / n_test))
        perm = np.random.permutation(n_test)
        X_val = np.concatenate(
            (X_test[perm][labels[perm] == 0][:n_val_normal],
             X_test[perm][labels[perm] == 1][:n_val_outlier]))
        labels = np.array([0] * n_val_normal + [1] * n_val_outlier)

        i = 1
        for gamma in gammas:

            # Build the training kernel
            kernel = pairwise_kernels(X, X, metric=self.kernel, gamma=gamma)

            # Model candidate
            model = ConvexSSAD(kernel,
                               semi_targets,
                               Cp=self.Cp,
                               Cu=self.Cu,
                               Cn=self.Cn)

            # Train
            start_time = time.time()
            model.fit()
            train_time = time.time() - start_time

            # Test on small hold-out set from test set
            kernel_val = pairwise_kernels(X_val,
                                          X[model.svs, :],
                                          metric=self.kernel,
                                          gamma=gamma)
            scores = (-1.0) * model.apply(kernel_val)
            scores = scores.flatten()

            # Compute AUC
            auc = roc_auc_score(labels, scores)

            logger.info(
                f'  | Model {i:02}/{len(gammas):02} | Gamma: {gamma:.8f} | Train Time: {train_time:.3f}s '
                f'| Val AUC: {100. * auc:.2f} |')

            if auc > best_auc:
                best_auc = auc
                self.model = model
                self.gamma = gamma
                self.results['train_time'] = train_time

            i += 1

        # Get support vectors for testing
        self.X_svs = X[self.model.svs, :]

        # If hybrid, also train a model with linear kernel
        if self.hybrid:
            linear_kernel = pairwise_kernels(X, X, metric='linear')
            self.linear_model = ConvexSSAD(linear_kernel,
                                           semi_targets,
                                           Cp=self.Cp,
                                           Cu=self.Cu,
                                           Cn=self.Cn)
            start_time = time.time()
            self.linear_model.fit()
            train_time = time.time() - start_time
            self.results['train_time_linear'] = train_time
            self.linear_X_svs = X[self.linear_model.svs, :]

        logger.info(
            f'Best Model: | Gamma: {self.gamma:.8f} | AUC: {100. * best_auc:.2f}'
        )
        logger.info('Training Time: {:.3f}s'.format(
            self.results['train_time']))
        logger.info('Finished training.')
Example #50
0
                                n_jobs=self.n_jobs,
                                degree=self.degree,
                                gamma=self.gamma,
                                coef0=self.coef0)


if __name__ == "__main__":
    X = np.random.normal(size=(1000, 100))
    Y = np.random.normal(size=(1000, 20))
    kcca = KCCA(n_components=10, kernel="rbf", n_jobs=1, epsilon=0.1).fit(X, Y)
    """
	matching on test data
	"""
    alpha = kcca.alpha
    beta = kcca.beta
    X_te = np.random.normal(size=(10, 100))
    Y_te = np.random.normal(size=(10, 20))
    Kx = kcca._pairwise_kernels(X_te, X)
    Ky = kcca._pairwise_kernels(Y_te, Y)
    F = np.dot(Kx, alpha)
    G = np.dot(Ky, beta)
    D = euclidean_distances(F, G)
    idx_pred = np.argmin(D, axis=0)
    print "matching result:", idx_pred, len(alpha), len(beta)
    """
	similarity between true object and predicted object on test data
	"""
    idx_true = range(10)
    C = pairwise_kernels(Y_te[idx_true], Y_te[idx_pred], metric="cosine")
    print "1-best mean similarity:", np.mean(C.diagonal())
Example #51
0
    def fit(self,
            X,
            y,
            src_index,
            tgt_index,
            tgt_index_labeled=None,
            **fit_params):
        """
        Fit KMM.

        Parameters
        ----------
        X : numpy array
            Input data.

        y : numpy array
            Output data.

        src_index : iterable
            indexes of source labeled data in X, y.

        tgt_index : iterable
            indexes of target unlabeled data in X, y.
            
        tgt_index_labeled : iterable, optional (default=None)
            indexes of target labeled data in X, y.

        fit_params : key, value arguments
            Arguments given to the fit method of the estimator
            (epochs, batch_size...).

        Returns
        -------
        self : returns an instance of self
        """
        check_indexes(src_index, tgt_index, tgt_index_labeled)

        if tgt_index_labeled is None:
            Xs = X[src_index]
            ys = y[src_index]
        else:
            Xs = X[np.concatenate((src_index, tgt_index_labeled))]
            ys = y[np.concatenate((src_index, tgt_index_labeled))]
        Xt = X[tgt_index]

        n_s = len(Xs)
        n_t = len(Xt)

        # Get epsilon
        if self.epsilon is None:
            self.epsilon = (np.sqrt(n_s) - 1) / np.sqrt(n_s)

        # Compute Kernel Matrix
        K = pairwise.pairwise_kernels(Xs,
                                      Xs,
                                      metric=self.kernel,
                                      **self.kernel_params)
        K = (1 / 2) * (K + K.transpose())

        # Compute q
        kappa = pairwise.pairwise_kernels(Xs,
                                          Xt,
                                          metric=self.kernel,
                                          **self.kernel_params)
        kappa = (n_s / n_t) * np.dot(kappa, np.ones((n_t, 1)))

        constraints = LinearConstraint(np.ones((1, n_s)),
                                       lb=n_s * (1 - self.epsilon),
                                       ub=n_s * (1 + self.epsilon))

        def func(x):
            return (1 / 2) * x.T @ (K @ x) - kappa.T @ x

        weights = minimize(func,
                           x0=np.ones((n_s, 1)),
                           bounds=[(0, self.B)] * n_s,
                           constraints=constraints)['x']

        self.weights_ = np.array(weights).ravel()

        self.estimator_ = check_estimator(self.get_estimator, **self.kwargs)

        try:
            self.estimator_.fit(Xs,
                                ys,
                                sample_weight=self.weights_,
                                **fit_params)
        except:
            bootstrap_index = np.random.choice(len(Xs),
                                               size=len(Xs),
                                               replace=True,
                                               p=self.weights_ /
                                               self.weights_.sum())
            self.estimator_.fit(Xs[bootstrap_index], ys[bootstrap_index],
                                **fit_params)
        return self