def xe(z, targets, predict=False, error=False, addon=0): """ """ if predict: return np.argmax(z, axis=1) _xe = z - np.atleast_2d(logsumexp(z, axis=1)).T n, _ = _xe.shape xe = -np.mean(_xe[np.arange(n), targets]) if error: err = np.exp(_xe) err[np.arange(n), targets] -= 1 #score + error return xe+addon, err/n else: return xe+addon
def xe(z, targets, predict=False, error=False, addon=0): """ """ if predict: return np.argmax(z, axis=1) _xe = z - np.atleast_2d(logsumexp(z, axis=1)).T n, _ = _xe.shape xe = -np.mean(_xe[np.arange(n), targets]) if error: err = np.exp(_xe) err[np.arange(n), targets] -= 1 #score + error return xe + addon, err / n else: return xe + addon
def mog(X, C, maxiters, M=None, Cov=None, pi=None, eps=1e-2): """Fit a MoG. _X_ is dataset in _rows_. _C_ is the number of clusters. """ N, d = X.shape ll_const = -d / 2. * np.log(2 * np.pi) if M is None: tmp = np.random.permutation(N) M = X[tmp[:C]].copy() if Cov is None: diag = np.mean(np.diag(np.cov( X, rowvar=0))) * (np.abs(np.random.randn()) + 1) Cov = (diag * np.eye(d)).reshape(1, d, d).repeat(C, axis=0) if pi is None: pi = np.ones(C) / C ll = np.zeros((C, N)) last_ll = -np.inf loglike = [] for i in xrange(maxiters): for c in xrange(C): cov_c = Cov[c] chol = la.cholesky(cov_c) # not exactly log det, factor 2 missing # but ok here, because no 0.5 factor below logdet = np.sum(np.log(np.diag(chol))) mu_c = M[c] # mahalanobis distance mhlb = (la.solve_triangular(chol, (X - mu_c).T, trans=1).T**2).sum(axis=1) # note missing 0.5 before logdet ll[c, :] = np.log(pi[c]) + ll_const - logdet - 0.5 * mhlb # posterior class distribution given data posteriors = norm_logprob(ll, axis=0) # loglikelihood over all datapoints ll_sum = np.sum(logsumexp(ll, axis=0)) loglike.append(ll_sum) if ll_sum - last_ll < eps: break last_ll = ll_sum for c in xrange(C): N_c = posteriors[c].sum() M[c, :] = np.sum(posteriors[c][:, np.newaxis] * X, axis=0) / N_c tmp = X - M[c] _cov = np.dot(posteriors[c] * tmp.T, tmp) / N_c # check if on trip to singularity # probably could be done a bit more clever, # in combination with E-step above (cholesky already # here and cache) _, _det = np.linalg.slogdet(_cov) if _det > LOGSMALL: Cov[c, :, :] = _cov pi[c] = N_c / N return M, Cov, pi, loglike
def mfa(X, hdim, C, maxiters, W=None, M=None, psi=None, pi=None, eps=1e-2): """Fit a Mixture of FA. _X_ is dataset in _rows_. _hdim_ is the latent dimension, the same for all _C_ classes. """ # pre calculation of some 'constants'. N, d = X.shape Ih = np.eye(hdim) ll_const = -d / 2. * np.log(2 * np.pi) X_sq = X**2 if W is None: W = np.random.randn(C, hdim, d) if M is None: tmp = np.random.permutation(N) M = X[tmp[:C]].copy() if psi is None: psi = 100 * np.var(X) * np.ones((C, d)) if pi is None: pi = np.ones(C) / C # pre allocating some helper memory E_z = np.zeros((C, N, hdim)) Cov_z = np.zeros((C, hdim, hdim)) # store loglikelihood ll = np.zeros((C, N)) last_ll = -np.inf loglike = [] for i in xrange(maxiters): for c in xrange(C): # W_c is hdim x d W_c = W[c] mu_c = M[c] # psi_c is D psi_c = psi[c] fac = W_c / psi_c # see Bishop, p. 93, eq. 2.117 cov_z = la.inv(Ih + np.dot(fac, W_c.T)) tmp = np.dot(X - mu_c, fac.T) # latent expectations E_z[c, :, :] = np.dot(tmp, cov_z) # latent _covariance_ Cov_z[c, :, :] = cov_z # loglikelihood # woodbury identity inv_cov_x = np.diag(1. / psi_c) - np.dot(fac.T, np.dot(cov_z, fac)) _, _det = np.linalg.slogdet(inv_cov_x) tmp = np.dot(X - mu_c, inv_cov_x) # integrating out latent z's -> again, Bishop, p. 93, eq. 2.115 ll[c, :] = np.log(pi[c]) + ll_const + 0.5 * _det - 0.5 * np.sum( tmp * (X - mu_c), axis=1) # posterior class distribution given data posteriors = norm_logprob(ll, axis=0) # loglikelihood over all datapoints ll_sum = np.sum(logsumexp(ll, axis=0)) loglike.append(ll_sum) if ll_sum - last_ll < eps: break last_ll = ll_sum for c in xrange(C): z = np.append(E_z[c, :, :], np.ones((N, 1)), axis=1) wz = posteriors[c][:, np.newaxis] * z wzX = np.dot(wz.T, X) wzz = np.dot(wz.T, z) N_c = posteriors[c].sum() wzz[:hdim, :hdim] += N_c * Cov_z[c, :, :] sol = la.lstsq(wzz, wzX)[0] M[c, :] = sol[hdim, :] W[c, :, :] = sol[:hdim, :] psi[c, :] = (np.dot(posteriors[c], X_sq) - np.sum(sol * wzX, axis=0)) / N_c psi[c, :] = np.maximum(psi[c, :], SMALL) pi[c] = N_c / N return W, M, psi, pi, loglike
def mog(X, C, maxiters, M=None, Cov=None, pi=None, eps=1e-2): """Fit a MoG. _X_ is dataset in _rows_. _C_ is the number of clusters. """ N, d = X.shape ll_const = -d/2. * np.log(2*np.pi) if M is None: tmp = np.random.permutation(N) M = X[tmp[:C]].copy() if Cov is None: diag = np.mean(np.diag(np.cov(X, rowvar=0)))*(np.abs(np.random.randn())+1) Cov = (diag * np.eye(d)).reshape(1, d, d).repeat(C, axis=0) if pi is None: pi = np.ones(C)/C ll = np.zeros((C, N)) last_ll = -np.inf loglike = [] for i in xrange(maxiters): for c in xrange(C): cov_c = Cov[c] chol = la.cholesky(cov_c) # not exactly log det, factor 2 missing # but ok here, because no 0.5 factor below logdet = np.sum(np.log(np.diag(chol))) mu_c = M[c] # mahalanobis distance mhlb = (la.solve_triangular(chol, (X - mu_c).T, trans=1).T**2).sum(axis=1) # note missing 0.5 before logdet ll[c, :] = np.log(pi[c]) + ll_const - logdet - 0.5 * mhlb # posterior class distribution given data posteriors = norm_logprob(ll, axis=0) # loglikelihood over all datapoints ll_sum = np.sum(logsumexp(ll, axis=0)) loglike.append(ll_sum) if ll_sum - last_ll < eps: break last_ll = ll_sum for c in xrange(C): N_c = posteriors[c].sum() M[c, :] = np.sum(posteriors[c][:, np.newaxis] * X, axis=0)/N_c tmp = X - M[c] _cov = np.dot(posteriors[c]*tmp.T, tmp)/N_c # check if on trip to singularity # probably could be done a bit more clever, # in combination with E-step above (cholesky already # here and cache) _, _det = np.linalg.slogdet(_cov) if _det > LOGSMALL: Cov[c, :, :] = _cov pi[c] = N_c/N return M, Cov, pi, loglike
def mfa(X, hdim, C, maxiters, W=None, M=None, psi=None, pi=None, eps=1e-2): """Fit a Mixture of FA. _X_ is dataset in _rows_. _hdim_ is the latent dimension, the same for all _C_ classes. """ # pre calculation of some 'constants'. N, d = X.shape Ih = np.eye(hdim) ll_const = -d/2. * np.log(2*np.pi) X_sq = X**2 if W is None: W = np.random.randn(C, hdim, d) if M is None: tmp = np.random.permutation(N) M = X[tmp[:C]].copy() if psi is None: psi = 100*np.var(X)*np.ones((C, d)) if pi is None: pi = np.ones(C)/C # pre allocating some helper memory E_z = np.zeros((C, N, hdim)) Cov_z = np.zeros((C, hdim, hdim)) # store loglikelihood ll = np.zeros((C, N)) last_ll = -np.inf loglike = [] for i in xrange(maxiters): for c in xrange(C): # W_c is hdim x d W_c = W[c] mu_c = M[c] # psi_c is D psi_c = psi[c] fac = W_c/psi_c # see Bishop, p. 93, eq. 2.117 cov_z = la.inv(Ih + np.dot(fac, W_c.T)) tmp = np.dot(X - mu_c, fac.T) # latent expectations E_z[c, :, :] = np.dot(tmp, cov_z) # latent _covariance_ Cov_z[c, :, :] = cov_z # loglikelihood # woodbury identity inv_cov_x = np.diag(1./psi_c) - np.dot(fac.T, np.dot(cov_z, fac)) _, _det = np.linalg.slogdet(inv_cov_x) tmp = np.dot(X-mu_c, inv_cov_x) # integrating out latent z's -> again, Bishop, p. 93, eq. 2.115 ll[c, :] = np.log(pi[c]) + ll_const + 0.5 * _det - 0.5 * np.sum(tmp*(X-mu_c), axis=1) # posterior class distribution given data posteriors = norm_logprob(ll, axis=0) # loglikelihood over all datapoints ll_sum = np.sum(logsumexp(ll, axis=0)) loglike.append(ll_sum) if ll_sum - last_ll < eps: break last_ll = ll_sum for c in xrange(C): z = np.append(E_z[c, :, :], np.ones((N, 1)), axis=1) wz = posteriors[c][:, np.newaxis] * z wzX = np.dot(wz.T, X) wzz = np.dot(wz.T, z) N_c = posteriors[c].sum() wzz[:hdim, :hdim] += N_c * Cov_z[c, :, :] sol = la.lstsq(wzz, wzX)[0] M[c, :] = sol[hdim, :] W[c, :, :] = sol[:hdim, :] psi[c, :] = (np.dot(posteriors[c], X_sq) - np.sum(sol*wzX, axis=0))/N_c psi[c, :] = np.maximum(psi[c, :], SMALL) pi[c] = N_c/N return W, M, psi, pi, loglike