Beispiel #1
0
    def setUp(self):
        """
        Set up each test with a new XDGMM object and some data.
        """
        self.xdgmm = XDGMM(n_components=3)
        self.files = []
        """
        Use scikit-learn GaussianMixture for sampling some data points
        """
        self.gmm = skl_GMM(n_components=3,
                           max_iter=10,
                           covariance_type='full',
                           random_state=None)
        self.gmm.weights_ = np.array([0.3, 0.5, 0.2])
        self.gmm.means_ = np.array(
            [np.array([0, 1]),
             np.array([5, 4]),
             np.array([2, 4])])
        self.gmm.covariances_ = np.array([
            np.diag((2, 1)),
            np.array([[1, 0.2], [0.2, 1]]),
            np.diag((0.3, 0.5))
        ])

        self.gmm.precisions_ = np.linalg.inv(self.gmm.covariances_)
        self.gmm.precisions_cholesky_ = np.linalg.cholesky(
            self.gmm.precisions_)

        self.X = self.gmm.sample(1000)[0]
        errs = 0.2 * np.random.random_sample((1000, 2))
        self.Xerr = np.zeros(self.X.shape + self.X.shape[-1:])
        diag = np.arange(self.X.shape[-1])
        self.Xerr[:, diag, diag] = np.vstack([errs[:, 0]**2, errs[:, 1]**2]).T
Beispiel #2
0
    def fit(self, X, Xerr):
        """Fit the XD model to data

        Whichever method is specified in self.method will be used.

        Results are saved in self.mu/V/weights and in the self.GMM
            object

        Parameters
        ----------
        X: array_like, shape = (n_samples, n_features)
            Input data.
        Xerr: array_like, shape = (n_samples, n_features, n_features)
            Error on input data.
        """

        if type(X) == pd.core.frame.DataFrame:
            if type(X.columns) == pd.indexes.base.Index:
                self.labels = np.array(X.columns)
            X = X.values

        if self.method=='astroML':
            self.GMM.n_components=self.n_components
            self.GMM.n_iter=self.n_iter
            self.GMM.fit(X, Xerr)

            self.V=self.GMM.V
            self.mu=self.GMM.mu
            self.weights=self.GMM.alpha

        if self.method=='Bovy':
            """
            Bovy extreme_deconvolution only imports if the method is
                'Bovy' (this is because installation is somewhat more
                complicated than astroML, and we don't want it to be
                required)

            As with the astroML method, initialize with a few steps of
                the scikit-learn GMM
            """
            from extreme_deconvolution import extreme_deconvolution\
                as bovyXD

            tmp_gmm = skl_GMM(self.n_components, max_iter=1,
                              covariance_type='full',
                              random_state=self.random_state)
            tmp_gmm._initialize_parameters(X, self.random_state)
            #tmp_gmm.fit(X)
            self.mu = tmp_gmm.means_
            self.weights = tmp_gmm.weights_
            self.V = tmp_gmm.covariances_

            logl=bovyXD.extreme_deconvolution(X,Xerr,self.weights,self.mu,self.V,
                        splitnmerge=self.splitnmerge,tol=self.tol,maxiter=self.n_iter,w=self.w)
            self.GMM.V = self.V
            self.GMM.mu = self.mu
            self.GMM.alpha = self.weights

        return self
Beispiel #3
0
def plot_cond_model(xdgmm, cond_xdgmm, y):
    plt.clf()
    setup_text_plots(fontsize=16, usetex=True)
    fig = plt.figure(figsize=(12, 9))

    ax1 = fig.add_subplot(111)
    for i in range(xdgmm.n_components):
        draw_ellipse(xdgmm.mu[i],
                     xdgmm.V[i],
                     scales=[2],
                     ax=ax1,
                     ec='None',
                     fc='gray',
                     alpha=0.2)

    ax1.plot([-2, 15], [y, y], color='blue', linewidth=2)
    ax1.set_xlim(-1, 13)
    ax1.set_ylim(-6, 16)
    ax1.set_xlabel('$x$', fontsize=18)
    ax1.set_ylabel('$y$', fontsize=18)

    ax2 = ax1.twinx()
    x = np.array([np.linspace(-2, 14, 1000)]).T

    gmm = skl_GMM(n_components=cond_xdgmm.n_components, covariance_type='full')
    gmm.means_ = cond_xdgmm.mu
    gmm.weights_ = cond_xdgmm.weights
    gmm.covars_ = cond_xdgmm.V

    logprob, responsibilities = gmm.score_samples(x)

    pdf = np.exp(logprob)
    ax2.plot(x,
             pdf,
             color='red',
             linewidth=2,
             label='Cond. dist. of $x$ given $y=' + str(y) + '\pm 0.05$')
    ax2.legend()
    ax2.set_ylabel('Probability', fontsize=18)
    ax2.set_ylim(0, 0.52)
    ax1.set_xlim(-1, 13)
    plt.show()
Beispiel #4
0
    def score_samples(self, X, Xerr):
        """Return per-sample liklihood of the data under the model
        
        Uses the scikit-learn GMM.score_samples method to compute the 
            log probability of X under the model and return the 
            posterior probabilites of each mixture component for each 
            element of X
        
        Scores each data point in X separately so that each
            corresponding Xerr array can be folded into the covariance
            matrices and be included in the calculation (since the 
            scikit-learn GMM implementation does not include errors)

        Parameters
        ----------
        X: array_like, shape = (n_samples, n_features)
            Input data.
        Xerr: array_like, shape = (n_samples, n_features, n_features)
            Error on input data.
                
        Returns
        -------
        logprob : array_like, shape = (n_samples, n_features)
            Log probabilities of each data point in X.
        
        responsibilities: array_like, shape = (n_samples, n_components)
            Posterior probabilities of each mixture component for each
            data point in X.
        """
        if self.V is None or self.mu is None or self.weights is None:
            raise StandardError("Model parameters not set.")
        
        if type(X) == pd.core.frame.DataFrame: X = X.values
        
        tmp_GMM = skl_GMM(self.n_components, max_iter=self.n_iter,
                          covariance_type='full',
                          random_state=self.random_state)
        tmp_GMM.weights_ = self.weights
        tmp_GMM.means_ = self.mu
        
        X = X[:, np.newaxis, :]
        Xerr = Xerr[:, np.newaxis, :, :]
        T = Xerr + self.V
        
        logprob = []
        responsibilities = []
        
        for i in range(X.shape[0]):
            tmp_GMM.covariances_ = T[i]
            tmp_GMM.precisions_ = np.linalg.inv(T[i])
            chol = np.linalg.cholesky(np.linalg.inv(T[i]))
            tmp_GMM.precisions_cholesky_ = chol
            lp = tmp_GMM.score_samples(X[i].reshape(1,-1))
            logprob.append(lp)
            resp = tmp_GMM.predict_proba(X[i].reshape(1,-1))
            responsibilities.append(resp)
        
        logprob=np.array(logprob)[:,0]
        responsibilities=np.array(responsibilities)[:,0]
        
        return logprob,responsibilities