def descs_to_sstats(xx, gmm): """ Converts the descriptors to sufficient statistics. Inputs ------ xx: array [nr_descs, nr_dimensions] Data matrix containing the descriptors. gmm: yael.gmm instance Mixture of Gaussian object. Output ------ sstats: array [nr_clusters + 2 * nr_clusters * nr_dimensions, ] Concatenation of the averaged posterior probabilities `Q_sum`, the first moment `Q_xx` and second-order moment `Q_xx_2`. """ xx = np.atleast_2d(xx) N = xx.shape[0] K = gmm.k D = gmm.d # Compute posterior probabilities using yael. Q_yael = fvec_new(N * K) gmm_compute_p(N, numpy_to_fvec_ref(xx), gmm, Q_yael, GMM_FLAGS_W) Q = fvec_to_numpy(Q_yael, N * K).reshape(N, K) # NxK yael.free(Q_yael) # Compute statistics. sstats = np.zeros(K + 2 * K * D, dtype=np.float32) sstats[: K] = np.sum(Q, 0) / N # 1xK sstats[K: K + K * D] = dot(Q.T, xx).flatten() / N # 1xKD sstats[K + K * D: K + 2 * K * D] = dot( Q.T, xx ** 2).flatten() / N # 1xKD return sstats
def descs_to_sstats(xx, gmm): """ Converts the descriptors to sufficient statistics. Inputs ------ xx: array [nr_descs, nr_dimensions] Data matrix containing the descriptors. gmm: yael.gmm instance Mixture of Gaussian object. Output ------ sstats: array [nr_clusters + 2 * nr_clusters * nr_dimensions, ] Concatenation of the averaged posterior probabilities `Q_sum`, the first moment `Q_xx` and second-order moment `Q_xx_2`. """ xx = np.atleast_2d(xx) N = xx.shape[0] K = gmm.k D = gmm.d # Compute posterior probabilities using yael. Q_yael = fvec_new(N * K) gmm_compute_p(N, numpy_to_fvec_ref(xx), gmm, Q_yael, GMM_FLAGS_W) Q = fvec_to_numpy(Q_yael, N * K).reshape(N, K) # NxK yael.free(Q_yael) # Compute statistics. sstats = np.zeros(K + 2 * K * D, dtype=np.float32) sstats[:K] = np.sum(Q, 0) / N # 1xK sstats[K:K + K * D] = dot(Q.T, xx).flatten() / N # 1xKD sstats[K + K * D:K + 2 * K * D] = dot(Q.T, xx**2).flatten() / N # 1xKD return sstats
def descs_to_sstats(xx, gmm): """ Converts the descriptors to sufficient statistics. Inputs ------ xx: array [nr_descs, nr_dimensions] Data matrix containing the descriptors. gmm: yael.gmm instance Mixture of Gaussian object. Output ------ Q_sum: array [nr_clusters, ] Averaged posterior probabilities. """ K = gmm.k N = xx.shape[0] # Compute posterior probabilities using yael. Q_yael = fvec_new(N * K) gmm_compute_p(N, numpy_to_fvec_ref(xx), gmm, Q_yael, GMM_FLAGS_W) Q = fvec_to_numpy(Q_yael, N * K).reshape(N, K) yael.free(Q_yael) # Compute statistics. Q_sum = sum(Q, 0) / N # 1xK return np.array(Q_sum, dtype=np.float32)
def gmm_predict_proba(xx, gmm): """Computes posterior probabilities using yael.""" N = xx.shape[0] K = gmm.k Q_yael = yael.fvec_new(N * K) yael.gmm_compute_p( N, yael.numpy_to_fvec_ref(xx), gmm, Q_yael, yael.GMM_FLAGS_W) Q = yael.fvec_to_numpy(Q_yael, N * K).reshape(N, K) yael.free(Q_yael) return Q
def readXVecsFromOpenedFile(file, dim, count, extension): if extension == 'bvecs': points = yael.fvec_new(dim * count) yael.b2fvecs_fread(file, points, count) a = yael.fvec_to_numpy(points, (count, dim)) yael.free(points) return a elif extension == 'fvecs': points = yael.fvec_new(dim * count) yael.fvecs_fread(file, points, count, dim) a = yael.fvec_to_numpy(points, (count, dim)) yael.free(points) return a else: raise Exception('Bad file extension!')
def descs_to_spatial_sstats(xx, ll, gmm): """ Computes spatial statistics from descriptors and their position. Inputs ------ xx: array [N, D], required N D-dimensional descriptors from an video (usually, after they are processed with PCA). ll: array [N, 3], required Descriptor locations in an image; on each row, we have the triplet (x, y, t). gmm: instance of yael object gmm Gauassian mixture object. Output ------ ss: array [1, K + 2 * 3 * k] Sufficient statistics in the form of a vector that concatenates (i) the sum of posteriors, (ii) an expected value of the locations ll under the posterior distribution Q and (iii) the second-order moment of the locations ll under the posterior distribution Q. """ N = ll.shape[0] K = gmm.k # Compute posterior probabilities using yael. Q_yael = fvec_new(N * K) gmm_compute_p(N, numpy_to_fvec_ref(xx), gmm, Q_yael, GMM_FLAGS_W) Q = fvec_to_numpy(Q_yael, N * K).reshape(N, K) yael.free(Q_yael) # Compute statistics. Q_sum = sum(Q, 0) / N # 1xK Q_ll = dot(Q.T, ll).flatten() / N # 1x3K Q_ll_2 = dot(Q.T, ll ** 2).flatten() / N # 1x3K return np.array(hstack((Q_sum, Q_ll, Q_ll_2)), dtype=np.float32)
def descs_to_spatial_sstats(xx, ll, gmm): """ Computes spatial statistics from descriptors and their position. Inputs ------ xx: array [N, D], required N D-dimensional descriptors from an video (usually, after they are processed with PCA). ll: array [N, 3], required Descriptor locations in an image; on each row, we have the triplet (x, y, t). gmm: instance of yael object gmm Gauassian mixture object. Output ------ ss: array [1, K + 2 * 3 * k] Sufficient statistics in the form of a vector that concatenates (i) the sum of posteriors, (ii) an expected value of the locations ll under the posterior distribution Q and (iii) the second-order moment of the locations ll under the posterior distribution Q. """ N = ll.shape[0] K = gmm.k # Compute posterior probabilities using yael. Q_yael = fvec_new(N * K) gmm_compute_p(N, numpy_to_fvec_ref(xx), gmm, Q_yael, GMM_FLAGS_W) Q = fvec_to_numpy(Q_yael, N * K).reshape(N, K) yael.free(Q_yael) # Compute statistics. Q_sum = sum(Q, 0) / N # 1xK Q_ll = dot(Q.T, ll).flatten() / N # 1x3K Q_ll_2 = dot(Q.T, ll**2).flatten() / N # 1x3K return np.array(hstack((Q_sum, Q_ll, Q_ll_2)), dtype=np.float32)
def readXvecs(filename, dim, count, tonumpy=True): extension = filename.strip().split('.')[-1] if extension == 'bvecs': points = yael.fvec_new(dim * count) yael.b2fvecs_read(filename, dim, count, points) if tonumpy: a = yael.fvec_to_numpy(points, (count, dim)) yael.free(points) return a else: return points elif extension == 'fvecs': points = yael.fvec_new(dim * count) yael.fvecs_read(filename, dim, count, points) if tonumpy: a = yael.fvec_to_numpy(points, (count, dim)) yael.free(points) return a else: return points elif extension == 'i8vecs': file = open(filename, 'r') points = np.zeros((count, dim), dtype='float32') for i in xrange(count): file.read(4) points[i,:] = np.fromfile(file, np.int8, dim).astype('float32') return points elif extension == 'ivecs': points = yael.ivec_new(dim * count) yael.ivecs_fread(open(filename, 'r'), points, count, dim) if tonumpy: a = yael.ivec_to_numpy(points, (count, dim)) yael.free(points) return a else: return points else: raise Exception('Bad file extension!')
def readXvecs(filename, dim, count, tonumpy=True): extension = filename.strip().split('.')[-1] if extension == 'bvecs': points = yael.fvec_new(dim * count) yael.b2fvecs_read(filename, dim, count, points) if tonumpy: a = yael.fvec_to_numpy(points, (count, dim)) yael.free(points) return a else: return points elif extension == 'fvecs': points = yael.fvec_new(dim * count) yael.fvecs_read(filename, dim, count, points) if tonumpy: a = yael.fvec_to_numpy(points, (count, dim)) yael.free(points) return a else: return points elif extension == 'i8vecs': file = open(filename, 'r') points = np.zeros((count, dim), dtype='float32') for i in xrange(count): file.read(4) points[i, :] = np.fromfile(file, np.int8, dim).astype('float32') return points elif extension == 'ivecs': points = yael.ivec_new(dim * count) yael.ivecs_fread(open(filename, 'r'), points, count, dim) if tonumpy: a = yael.ivec_to_numpy(points, (count, dim)) yael.free(points) return a else: return points else: raise Exception('Bad file extension!')