Ejemplo n.º 1
0
def data_normalize_inplace(x, power = 0.5, compute_norm_subset = None):

    if compute_norm_subset != None:
        x_subset = x[compute_norm_subset]
    else:
        x_subset = x
    
    mu = x_subset.mean(axis=0)    
    sigma = (x_subset - mu).std(axis=0)
    
    if np.min(sigma) == 0:
        warnings.warn("At least one dimension of the data has zero variance.")
        sigma[sigma == 0] = 1.

    del x_subset
    x -= mu
    x *= 1. / sigma
    
    if power == 1: 
        pass
    elif power == 0.5:
        yael.fvec_ssqrt(yael.numpy_to_fvec_ref(x), x.size)
    else:
        yael.fvec_spow(yael.numpy_to_fvec_ref(x), x.size, power)
    yael.fmat_normalize_columns_l2sqr_pow(yael.numpy_to_fvec_ref(x), x.shape[1], x.shape[0], -0.5)

    return mu, sigma
Ejemplo n.º 2
0
def scores_to_probas(scores, A, B):
     probas = np.empty(scores.size, dtype = np.float32)
     scores = scores.astype(np.float32)

     libsvm_precomputed.scores_to_probas(A, B, scores.size,
                                         yael.numpy_to_fvec_ref(scores),     
                                         yael.numpy_to_fvec_ref(probas))
     return probas
Ejemplo n.º 3
0
    def initYaelGmm(self):
        self.yael_gmm = yael.gmm_t()
        self.yael_gmm.d = self.n_features
        self.yael_gmm.k = self.n_components

        self.yael_gmm.mu = yael.numpy_to_fvec_ref(self.means_)
        self.yael_gmm.sigma = yael.numpy_to_fvec(self.covars_)
        self.yael_gmm.w = yael.numpy_to_fvec_ref(self.weights_)
Ejemplo n.º 4
0
    def eval_params(self, params, fold):

        train_index, test_index = self.splits[fold][:2]

        if len(self.splits[fold]) == 3: # useful for multiclass optimization
            cx = self.splits[fold][2]
        else:
            cx = self.cx        

        c = params['c']    
        pos_weight = params['positive_weight']

        Kxx = combine_kernels(self.Kxx, params)
        
        dual_coef, bias = libsvm_train(Kxx, cx, train_index,
                                       c = c,
                                       pos_weight = pos_weight)

        scores = np.empty(test_index.size, dtype = np.float32)
        
        libsvm_precomputed.mul_matvec_subset(
            yael.numpy_to_fvec_ref(Kxx), Kxx.shape[1],
            yael.numpy_to_ivec_ref(train_index), train_index.size,
            yael.numpy_to_ivec_ref(test_index), test_index.size,
            yael.numpy_to_dvec_ref(dual_coef),
            yael.numpy_to_fvec_ref(scores))      
        scores += bias                          

        if self.criterion == 'ap':           
                perf = average_precision(cx[test_index], scores)
        elif self.criterion == 'dcr':
                perf = 1 - compute_dcr(cx[test_index], scores)
        elif self.criterion == 'sdcr':
                perf = 1 - surrogate_dcr(cx[test_index], scores)
        else:
                assert False

        #### microscopic penalizations
        # to favor the lowest c among ties
        perf -= math.log(c) * 1e-6
        # to favor positive_weight == 1
        perf -= abs(math.log(pos_weight)) * 1e-6
        

        stats = Stats()
        stats.valid_accuracies = np.array([perf])

        return stats
Ejemplo n.º 5
0
    def descs_to_sstats(xx, gmm):
        """ Converts the descriptors to sufficient statistics.
        
        Inputs
        ------
        xx: array [nr_descs, nr_dimensions]
            Data matrix containing the descriptors.

        gmm: yael.gmm instance
            Mixture of Gaussian object.

        Output
        ------
        Q_sum: array [nr_clusters, ]
            Averaged posterior probabilities.

        """
        K = gmm.k
        N = xx.shape[0]
        # Compute posterior probabilities using yael.
        Q_yael = fvec_new(N * K)
        gmm_compute_p(N, numpy_to_fvec_ref(xx), gmm, Q_yael, GMM_FLAGS_W)
        Q = fvec_to_numpy(Q_yael, N * K).reshape(N, K)
        yael.free(Q_yael)
        # Compute statistics.
        Q_sum = sum(Q, 0) / N  # 1xK
        return np.array(Q_sum, dtype=np.float32)
Ejemplo n.º 6
0
    def descs_to_sstats(xx, gmm):
        """ Converts the descriptors to sufficient statistics.

        Inputs
        ------
        xx: array [nr_descs, nr_dimensions]
            Data matrix containing the descriptors.

        gmm: yael.gmm instance
            Mixture of Gaussian object.

        Output
        ------
        sstats: array [nr_clusters + 2 * nr_clusters * nr_dimensions, ]
            Concatenation of the averaged posterior probabilities `Q_sum`, the
            first moment `Q_xx` and second-order moment `Q_xx_2`.

        """
        xx = np.atleast_2d(xx)
        N = xx.shape[0]
        K = gmm.k
        D = gmm.d
        # Compute posterior probabilities using yael.
        Q_yael = fvec_new(N * K)
        gmm_compute_p(N, numpy_to_fvec_ref(xx), gmm, Q_yael, GMM_FLAGS_W)
        Q = fvec_to_numpy(Q_yael, N * K).reshape(N, K)  # NxK
        yael.free(Q_yael)
        # Compute statistics.
        sstats = np.zeros(K + 2 * K * D, dtype=np.float32)
        sstats[: K] = np.sum(Q, 0) / N                            # 1xK
        sstats[K: K + K * D] = dot(Q.T, xx).flatten() / N         # 1xKD
        sstats[K + K * D: K + 2 * K * D] = dot(
            Q.T, xx ** 2).flatten() / N                           # 1xKD
        return sstats
Ejemplo n.º 7
0
    def descs_to_sstats(xx, gmm):
        """ Converts the descriptors to sufficient statistics.
        
        Inputs
        ------
        xx: array [nr_descs, nr_dimensions]
            Data matrix containing the descriptors.

        gmm: yael.gmm instance
            Mixture of Gaussian object.

        Output
        ------
        Q_sum: array [nr_clusters, ]
            Averaged posterior probabilities.

        """
        K = gmm.k
        N = xx.shape[0]
        # Compute posterior probabilities using yael.
        Q_yael = fvec_new(N * K)
        gmm_compute_p(N, numpy_to_fvec_ref(xx), gmm, Q_yael, GMM_FLAGS_W)
        Q = fvec_to_numpy(Q_yael, N * K).reshape(N, K)
        yael.free(Q_yael)
        # Compute statistics.
        Q_sum = sum(Q, 0) / N                     # 1xK
        return np.array(Q_sum, dtype=np.float32)
Ejemplo n.º 8
0
def compute_gmm(data, nr_clusters, nr_iterations, nr_threads, seed, nr_redos):
    """Computes GMM using yael functions."""
    N, D = data.shape
    data = np.ascontiguousarray(data)
    return gmm_learn(
        D, N, nr_clusters, nr_iterations, numpy_to_fvec_ref(data), nr_threads,
        seed, nr_redos, GMM_FLAGS_W)
Ejemplo n.º 9
0
    def descs_to_sstats(xx, gmm):
        """ Converts the descriptors to sufficient statistics.

        Inputs
        ------
        xx: array [nr_descs, nr_dimensions]
            Data matrix containing the descriptors.

        gmm: yael.gmm instance
            Mixture of Gaussian object.

        Output
        ------
        sstats: array [nr_clusters + 2 * nr_clusters * nr_dimensions, ]
            Concatenation of the averaged posterior probabilities `Q_sum`, the
            first moment `Q_xx` and second-order moment `Q_xx_2`.

        """
        xx = np.atleast_2d(xx)
        N = xx.shape[0]
        K = gmm.k
        D = gmm.d
        # Compute posterior probabilities using yael.
        Q_yael = fvec_new(N * K)
        gmm_compute_p(N, numpy_to_fvec_ref(xx), gmm, Q_yael, GMM_FLAGS_W)
        Q = fvec_to_numpy(Q_yael, N * K).reshape(N, K)  # NxK
        yael.free(Q_yael)
        # Compute statistics.
        sstats = np.zeros(K + 2 * K * D, dtype=np.float32)
        sstats[:K] = np.sum(Q, 0) / N  # 1xK
        sstats[K:K + K * D] = dot(Q.T, xx).flatten() / N  # 1xKD
        sstats[K + K * D:K + 2 * K * D] = dot(Q.T, xx**2).flatten() / N  # 1xKD
        return sstats
Ejemplo n.º 10
0
def gmm_predict_proba(xx, gmm):
    """Computes posterior probabilities using yael."""
    N = xx.shape[0]
    K = gmm.k

    Q_yael = yael.fvec_new(N * K)
    yael.gmm_compute_p(
        N, yael.numpy_to_fvec_ref(xx), gmm, Q_yael, yael.GMM_FLAGS_W)
    Q = yael.fvec_to_numpy(Q_yael, N * K).reshape(N, K)
    yael.free(Q_yael)

    return Q
Ejemplo n.º 11
0
def libsvm_train(Kxx, cx, subset, c, pos_weight = 1.0, eps = 1e-3, verbose = 0, probability = 0):  
    # check input
    assert Kxx.shape[0] == Kxx.shape[1] and Kxx.flags.c_contiguous, Kxx.shape
    assert subset.flags.c_contiguous and cx.flags.c_contiguous    
    assert np.all(subset < Kxx.shape[0]) and np.all(subset >= 0)
    
    # set libsvm params 

    param = libsvm_precomputed.svm_parameter()
    libsvm_precomputed.svm_param_set_default(param)

    param.nr_weight = 2
    param.weight_label = weight_label = yael.ivec(2)
    weight_label[0] = -1
    weight_label[1] = 1
    param.weight = weights = yael.dvec(2)
    npos = (cx[subset] == 1).sum()
    nneg = (cx[subset] == -1).sum()
    weights[0] = 2 * npos / float(npos + nneg)
    weights[1] = 2 * nneg / float(npos + nneg) * pos_weight
    param.C = c
    param.nu = param.p = 0
    param.shrinking = 1
    param.probability = probability
    param.eps = eps
    libsvm_precomputed.svm_set_verbose(verbose)
    
    # prepare output
    nex = subset.size
    dual_coeffs = np.empty((nex,), dtype = np.float64)
    bias_out = yael.dvec(3)
    
    # actual call 
    ret = libsvm_precomputed.svm_train_precomputed(
                nex, 
                yael.numpy_to_ivec_ref(subset),
                yael.numpy_to_ivec_ref(cx),
                yael.numpy_to_fvec_ref(Kxx),
                Kxx.shape[1],
                param,
                yael.numpy_to_dvec_ref(dual_coeffs),
                bias_out)
    assert ret > 0

    bias_term = bias_out[0]
    #print dual_coeffs, bias_term
    if probability:
        probA = bias_out[1]
        probB = bias_out[2]        
        return dual_coeffs, bias_term, probA, probB
    else: 
        return dual_coeffs, bias_term
Ejemplo n.º 12
0
    def fit(self, X):
        n_samples, self.n_features = X.shape

        yael_X = yael.numpy_to_fvec_ref(X)

        yael_gmm = yael.gmm_learn(
            self.n_features, n_samples, self.n_components, self.n_iter, yael_X,
            self.n_threads, 0, self.n_init,
            yael.GMM_FLAGS_W | yael.GMM_FLAGS_SIGMA | yael.GMM_FLAGS_MU)

        self.means_ = yael.fvec_to_numpy(
            yael_gmm.mu, self.n_components * self.n_features).reshape(
                (self.n_components, self.n_features))
        self.covars_ = yael.fvec_to_numpy(
            yael_gmm.sigma, self.n_components * self.n_features).reshape(
                (self.n_components, self.n_features))
        self.weights_ = yael.fvec_to_numpy(yael_gmm.w, self.n_components)

        yael.gmm_delete(yael_gmm)
Ejemplo n.º 13
0
    def computeResponsabilities(self, X):
        if self.yael_gmm is None:
            self.initYaelGmm()

        if len(X.shape) == 1:
            n_samples = 1
        else:
            n_samples = X.shape[0]

        yael_X = yael.numpy_to_fvec_ref(X)
        yael_p = yael.fvec_new_0(self.n_components * n_samples)

        yael.gmm_compute_p_thread(
            n_samples, yael_X, self.yael_gmm, yael_p,
            yael.GMM_FLAGS_W | yael.GMM_FLAGS_SIGMA | yael.GMM_FLAGS_MU,
            self.n_threads)

        return yael.fvec_to_numpy_acquire(yael_p, n_samples *
                                          self.n_components).reshape(
                                              (n_samples, self.n_components))
Ejemplo n.º 14
0
    def descs_to_spatial_sstats(xx, ll, gmm):
        """ Computes spatial statistics from descriptors and their position.

        Inputs
        ------
        xx: array [N, D], required
            N D-dimensional descriptors from an video (usually, after they are
            processed with PCA).

        ll: array [N, 3], required
            Descriptor locations in an image; on each row, we have the triplet
            (x, y, t).

        gmm: instance of yael object gmm
            Gauassian mixture object.

        Output
        ------
        ss: array [1, K + 2 * 3 * k]
            Sufficient statistics in the form of a vector that concatenates 
            (i) the sum of posteriors, (ii) an expected value of the locations 
            ll under the posterior distribution Q and (iii) the second-order
            moment of the locations ll under the posterior distribution Q.

        """
        N = ll.shape[0]
        K = gmm.k

        # Compute posterior probabilities using yael.
        Q_yael = fvec_new(N * K)
        gmm_compute_p(N, numpy_to_fvec_ref(xx), gmm, Q_yael, GMM_FLAGS_W)
        Q = fvec_to_numpy(Q_yael, N * K).reshape(N, K)
        yael.free(Q_yael)
        # Compute statistics.
        Q_sum = sum(Q, 0) / N  # 1xK
        Q_ll = dot(Q.T, ll).flatten() / N  # 1x3K
        Q_ll_2 = dot(Q.T, ll**2).flatten() / N  # 1x3K
        return np.array(hstack((Q_sum, Q_ll, Q_ll_2)), dtype=np.float32)
Ejemplo n.º 15
0
    def descs_to_spatial_sstats(xx, ll, gmm):
        """ Computes spatial statistics from descriptors and their position.

        Inputs
        ------
        xx: array [N, D], required
            N D-dimensional descriptors from an video (usually, after they are
            processed with PCA).

        ll: array [N, 3], required
            Descriptor locations in an image; on each row, we have the triplet
            (x, y, t).

        gmm: instance of yael object gmm
            Gauassian mixture object.

        Output
        ------
        ss: array [1, K + 2 * 3 * k]
            Sufficient statistics in the form of a vector that concatenates 
            (i) the sum of posteriors, (ii) an expected value of the locations 
            ll under the posterior distribution Q and (iii) the second-order
            moment of the locations ll under the posterior distribution Q.

        """
        N = ll.shape[0] 
        K = gmm.k

        # Compute posterior probabilities using yael.
        Q_yael = fvec_new(N * K)
        gmm_compute_p(N, numpy_to_fvec_ref(xx), gmm, Q_yael, GMM_FLAGS_W)
        Q = fvec_to_numpy(Q_yael, N * K).reshape(N, K)
        yael.free(Q_yael)
        # Compute statistics.
        Q_sum = sum(Q, 0) / N                     # 1xK
        Q_ll = dot(Q.T, ll).flatten() / N         # 1x3K
        Q_ll_2 = dot(Q.T, ll ** 2).flatten() / N  # 1x3K 
        return np.array(hstack((Q_sum, Q_ll, Q_ll_2)), dtype=np.float32)
Ejemplo n.º 16
0
print('-> Yael A')
yael_a = yael.FloatArray.acquirepointer(yael.numpy_to_fvec(numpy_a))
n = numpy_a.size
yael.fvec_print(yael_a, n)

print('-> Numpy A')
print(yael.fvec_to_numpy(yael_a, n))
print('int array')
numpy_a = np.array(list(range(5)), dtype='int32')
print(numpy_a)

print('-> Yael A2')
yael_a = yael.IntArray.acquirepointer(yael.numpy_to_ivec(numpy_a))
n = numpy_a.size
yael.ivec_print(yael_a, n)

print('-> Numpy A2')
print(yael.ivec_to_numpy(yael_a, n))
print('float array, pass by reference')
numpy_a = np.array(list(range(5)), dtype='float32')
print(numpy_a)


if '--force-crash' in sys.argv:
    yael_a = yael.FloatArray.acquirepointer(yael.numpy_to_fvec_ref(numpy_a))
    n = numpy_a.size
    yael.fvec_print(yael_a, n)
    del numpy_a
    print('Forced Crash Example!')
    yael.fvec_print(yael_a, n)
Ejemplo n.º 17
0
def compute_gmm(data, nr_clusters, nr_iterations, nr_threads, seed, nr_redos):
    """ Computes GMM using yael functions. """
    N, D = data.shape
    gmm = gmm_learn(D, N, nr_clusters, nr_iterations, numpy_to_fvec_ref(data),
                    nr_threads, seed, nr_redos, GMM_FLAGS_W)
    return gmm