def XDGMM(X, Xcov, n_components, n_iter=100, tol=1E-5, Nthreads=1, R=None, init_n_iter=10, w=None, model=None, fixed_means=None, aligned_covs=None, verbose=False): """ Extreme Deconvolution Fit an extreme deconvolution (XD) model to the data Parameters ---------- n_components: integer number of gaussian components to fit to the data n_iter: integer (optional) number of EM iterations to perform (default=100) tol: float (optional) stopping criterion for EM iterations (default=1E-5) X: array_like Input data. shape = (n_samples, n_features) Xcov: array_like Covariance of input data. shape = (n_samples, n_features, n_features) R: array_like (TODO: not implemented) Transformation matrix from underlying to observed data. If unspecified, then it is assumed to be the identity matrix. w: float or array_like if float - w * np.eye is added to V if vector - np.diag(w) is added to V if array - w is added to V Notes ----- This implementation follows Bovy et al. arXiv 0905.2979 """ if model is None: model = xd_model(X.shape, n_components, n_iter, tol, w, Nthreads, fixed_means, aligned_covs, verbose) if R is not None: raise NotImplementedError("mixing matrix R is not yet implemented") X = np.asarray(X) Xcov = np.asarray(Xcov) # assume full covariances of data assert Xcov.shape == (model.n_samples, model.n_features, model.n_features) # initialize components via a few steps of GMM # this doesn't take into account errors, but is a fast first-guess if model.V is None: t0 = time() if (fixed_means is None) & (aligned_covs is None): gmm = GMM(model.n_components, n_iter=init_n_iter, covariance_type='full').fit(X) else: gmm = constrained_GMM(X, model.n_components, init_n_iter, fixed_means, aligned_covs) model.mu = gmm.means_ model.alpha = gmm.weights_ model.V = gmm.covars_ model.initial_logL = model.logLikelihood(X, Xcov) if model.verbose: print 'Initalization done in %.2g sec' % (time() - t0) print 'Initial Log Likelihood: ', model.initial_logL logL = -np.inf for i in range(model.n_iter): t0 = time() model = _EMstep(model, X, Xcov) logL_next = model.logLikelihood(X, Xcov) t1 = time() if model.verbose: print "%i: log(L) = %.5g" % (i + 1, logL_next) print " (%.2g sec)" % (t1 - t0) model.prev_logL = logL model.logL = logL_next if logL_next < logL + model.tol: break logL = logL_next return model