Exemple #1
0
    def pdf_comp(self, x, cid, log=False):
        """Computes the pdf of the model at given points, at given component.

        :Parameters:
            x : ndarray
                points where to estimate the pdf. One row for one
                multi-dimensional sample (eg to estimate the pdf at 100
                different points in 10 dimension, data's shape should be (100,
                20)).
            cid: int
                the component index.
            log : bool
                If true, returns the log pdf instead of the pdf.

        :Returns:
            y : ndarray
                the pdf at points x."""
        if self.mode == 'diag':
            va = self.va[cid]
        elif self.mode == 'full':
            va = self.va[cid * self.d:(cid + 1) * self.d]
        else:
            raise GmParamError("""var mode %s not supported""" % self.mode)

        if log:
            return D.gauss_den(x, self.mu[cid], va, log = True) \
                   + N.log(self.w[cid])
        else:
            return D.multiple_gauss_den(x, self.mu[cid], va) * self.w[cid]
Exemple #2
0
    def pdf_comp(self, x, cid, log=False):
        """Computes the pdf of the model at given points, at given component.

        :Parameters:
            x : ndarray
                points where to estimate the pdf. One row for one
                multi-dimensional sample (eg to estimate the pdf at 100
                different points in 10 dimension, data's shape should be (100,
                20)).
            cid: int
                the component index.
            log : bool
                If true, returns the log pdf instead of the pdf.

        :Returns:
            y : ndarray
                the pdf at points x."""
        if self.mode == "diag":
            va = self.va[cid]
        elif self.mode == "full":
            va = self.va[cid * self.d : (cid + 1) * self.d]
        else:
            raise GmParamError("""var mode %s not supported""" % self.mode)

        if log:
            return D.gauss_den(x, self.mu[cid], va, log=True) + N.log(self.w[cid])
        else:
            return D.multiple_gauss_den(x, self.mu[cid], va) * self.w[cid]
Exemple #3
0
    def pdf(self, x, log=False):
        """Computes the pdf of the model at given points.

        :Parameters:
            x : ndarray
                points where to estimate the pdf. One row for one
                multi-dimensional sample (eg to estimate the pdf at 100
                different points in 10 dimension, data's shape should be (100,
                20)).
            log : bool
                If true, returns the log pdf instead of the pdf.

        :Returns:
            y : ndarray
                the pdf at points x."""
        if log:
            return D.logsumexp(D.multiple_gauss_den(x, self.mu, self.va, log=True) + N.log(self.w))
        else:
            return N.sum(D.multiple_gauss_den(x, self.mu, self.va) * self.w, 1)
    def likelihood(self, data):
        """ Returns the current log likelihood of the model given
        the data """
        assert (self.isinit)
        # compute the gaussian pdf
        tgd = densities.multiple_gauss_den(data, self.gm.mu, self.gm.va)
        # multiply by the weight
        tgd *= self.gm.w

        return N.sum(N.log(N.sum(tgd, axis=1)), axis=0)
    def likelihood(self, data):
        """ Returns the current log likelihood of the model given
        the data """
        assert(self.isinit)
        # compute the gaussian pdf
        tgd	= densities.multiple_gauss_den(data, self.gm.mu, self.gm.va)
        # multiply by the weight
        tgd	*= self.gm.w

        return N.sum(N.log(N.sum(tgd, axis = 1)), axis = 0)
Exemple #6
0
    def pdf(self, x, log=False):
        """Computes the pdf of the model at given points.

        :Parameters:
            x : ndarray
                points where to estimate the pdf. One row for one
                multi-dimensional sample (eg to estimate the pdf at 100
                different points in 10 dimension, data's shape should be (100,
                20)).
            log : bool
                If true, returns the log pdf instead of the pdf.

        :Returns:
            y : ndarray
                the pdf at points x."""
        if log:
            return D.logsumexp(
                D.multiple_gauss_den(x, self.mu, self.va, log=True) +
                N.log(self.w))
        else:
            return N.sum(D.multiple_gauss_den(x, self.mu, self.va) * self.w, 1)
    def compute_responsabilities(self, data):
        """Compute responsabilities.
        
        Return normalized and non-normalized respondabilities for the model.
        
        Note
        ----
        Computes the latent variable distribution (a posteriori probability)
        knowing the explicit data for the Gaussian model (w, mu, var): gamma(t,
        i) = P[state = i | observation = data(t); w, mu, va]

        This is basically the E step of EM for finite mixtures."""
        # compute the gaussian pdf
        tgd = densities.multiple_gauss_den(data, self.gm.mu, self.gm.va)
        # multiply by the weight
        tgd *= self.gm.w
        # Normalize to get a pdf
        gd = tgd / N.sum(tgd, axis=1)[:, N.newaxis]

        return gd, tgd
    def compute_responsabilities(self, data):
        """Compute responsabilities.
        
        Return normalized and non-normalized respondabilities for the model.
        
        Note
        ----
        Computes the latent variable distribution (a posteriori probability)
        knowing the explicit data for the Gaussian model (w, mu, var): gamma(t,
        i) = P[state = i | observation = data(t); w, mu, va]

        This is basically the E step of EM for finite mixtures."""
        # compute the gaussian pdf
        tgd	= densities.multiple_gauss_den(data, self.gm.mu, self.gm.va)
        # multiply by the weight
        tgd	*= self.gm.w
        # Normalize to get a pdf
        gd	= tgd  / N.sum(tgd, axis=1)[:, N.newaxis]

        return gd, tgd
    def compute_log_responsabilities(self, data):
        """Compute log responsabilities.
        
        Return normalized and non-normalized responsabilities for the model (in
        the log domain)
        
        Note
        ----
        Computes the latent variable distribution (a posteriori probability)
        knowing the explicit data for the Gaussian model (w, mu, var): gamma(t,
        i) = P[state = i | observation = data(t); w, mu, va]

        This is basically the E step of EM for finite mixtures."""
        # compute the gaussian pdf
        tgd	= densities.multiple_gauss_den(data, self.gm.mu, 
                                           self.gm.va, log = True)
        # multiply by the weight
        tgd	+= N.log(self.gm.w)
        # Normalize to get a (log) pdf
        gd	= tgd  - densities.logsumexp(tgd)[:, N.newaxis]

        return gd, tgd
Exemple #10
0
    def compute_log_responsabilities(self, data):
        """Compute log responsabilities.
        
        Return normalized and non-normalized responsabilities for the model (in
        the log domain)
        
        Note
        ----
        Computes the latent variable distribution (a posteriori probability)
        knowing the explicit data for the Gaussian model (w, mu, var): gamma(t,
        i) = P[state = i | observation = data(t); w, mu, va]

        This is basically the E step of EM for finite mixtures."""
        # compute the gaussian pdf
        tgd = densities.multiple_gauss_den(data,
                                           self.gm.mu,
                                           self.gm.va,
                                           log=True)
        # multiply by the weight
        tgd += N.log(self.gm.w)
        # Normalize to get a (log) pdf
        gd = tgd - densities.logsumexp(tgd)[:, N.newaxis]

        return gd, tgd
Exemple #11
0
    def plot1d(self, level=misc.DEF_LEVEL, fill=False, gpdf=False):
        """Plots the pdf of each component of the 1d mixture.
        
        :Parameters:
            level : int
                level of confidence (to use with fill argument)
            fill : bool
                if True, the area of the pdf corresponding to the given
                confidence intervales is filled.
            gpdf : bool
                if True, the global pdf is plot.
        
        :Returns:
            h : dict
                Returns a dictionary h of plot handles so that their properties
                can be modified (eg color, label, etc...):
                - h['pdf'] is a list of lines, one line per component pdf
                - h['gpdf'] is the line for the global pdf
                - h['conf'] is a list of filling area
        """
        if not self.__is1d:
            raise ValueError("This function does not make sense for " "mixtures which are not unidimensional")

        from scipy.stats import norm

        pval = N.sqrt(self.va[:, 0]) * norm(0, 1).ppf((1 + level) / 2)

        # Compute reasonable min/max for the normal pdf: [-mc * std, mc * std]
        # gives the range we are taking in account for each gaussian
        mc = 3
        std = N.sqrt(self.va[:, 0])
        mi = N.amin(self.mu[:, 0] - mc * std)
        ma = N.amax(self.mu[:, 0] + mc * std)

        np = 500
        x = N.linspace(mi, ma, np)
        # Prepare the dic of plot handles to return
        ks = ["pdf", "conf", "gpdf"]
        hp = dict((i, []) for i in ks)

        # Compute the densities
        y = D.multiple_gauss_den(x[:, N.newaxis], self.mu, self.va, log=True) + N.log(self.w)
        yt = self.pdf(x[:, N.newaxis])

        try:
            import pylab as P

            for c in range(self.k):
                h = P.plot(x, N.exp(y[:, c]), "r", label="_nolegend_")
                hp["pdf"].extend(h)
                if fill:
                    # Compute x coordinates of filled area
                    id1 = -pval[c] + self.mu[c]
                    id2 = pval[c] + self.mu[c]
                    xc = x[:, N.where(x > id1)[0]]
                    xc = xc[:, N.where(xc < id2)[0]]

                    # Compute the graph for filling
                    yf = self.pdf_comp(xc, c)
                    xc = N.concatenate(([xc[0]], xc, [xc[-1]]))
                    yf = N.concatenate(([0], yf, [0]))
                    h = P.fill(xc, yf, facecolor="b", alpha=0.1, label="_nolegend_")
                    hp["conf"].extend(h)
            if gpdf:
                h = P.plot(x, yt, "r:", label="_nolegend_")
                hp["gpdf"] = h
            return hp
        except ImportError:
            raise GmParamError("matplotlib not found, cannot plot...")
Exemple #12
0
    def plot1d(self, level=misc.DEF_LEVEL, fill=False, gpdf=False):
        """Plots the pdf of each component of the 1d mixture.
        
        :Parameters:
            level : int
                level of confidence (to use with fill argument)
            fill : bool
                if True, the area of the pdf corresponding to the given
                confidence intervales is filled.
            gpdf : bool
                if True, the global pdf is plot.
        
        :Returns:
            h : dict
                Returns a dictionary h of plot handles so that their properties
                can be modified (eg color, label, etc...):
                - h['pdf'] is a list of lines, one line per component pdf
                - h['gpdf'] is the line for the global pdf
                - h['conf'] is a list of filling area
        """
        if not self.__is1d:
            raise ValueError("This function does not make sense for "\
                "mixtures which are not unidimensional")

        from scipy.stats import norm
        pval = N.sqrt(self.va[:, 0]) * norm(0, 1).ppf((1 + level) / 2)

        # Compute reasonable min/max for the normal pdf: [-mc * std, mc * std]
        # gives the range we are taking in account for each gaussian
        mc = 3
        std = N.sqrt(self.va[:, 0])
        mi = N.amin(self.mu[:, 0] - mc * std)
        ma = N.amax(self.mu[:, 0] + mc * std)

        np = 500
        x = N.linspace(mi, ma, np)
        # Prepare the dic of plot handles to return
        ks = ['pdf', 'conf', 'gpdf']
        hp = dict((i, []) for i in ks)

        # Compute the densities
        y   = D.multiple_gauss_den(x[:, N.newaxis], self.mu, self.va, \
                                   log = True) \
              + N.log(self.w)
        yt = self.pdf(x[:, N.newaxis])

        try:
            import pylab as P
            for c in range(self.k):
                h = P.plot(x, N.exp(y[:, c]), 'r', label='_nolegend_')
                hp['pdf'].extend(h)
                if fill:
                    # Compute x coordinates of filled area
                    id1 = -pval[c] + self.mu[c]
                    id2 = pval[c] + self.mu[c]
                    xc = x[:, N.where(x > id1)[0]]
                    xc = xc[:, N.where(xc < id2)[0]]

                    # Compute the graph for filling
                    yf = self.pdf_comp(xc, c)
                    xc = N.concatenate(([xc[0]], xc, [xc[-1]]))
                    yf = N.concatenate(([0], yf, [0]))
                    h = P.fill(xc,
                               yf,
                               facecolor='b',
                               alpha=0.1,
                               label='_nolegend_')
                    hp['conf'].extend(h)
            if gpdf:
                h = P.plot(x, yt, 'r:', label='_nolegend_')
                hp['gpdf'] = h
            return hp
        except ImportError:
            raise GmParamError("matplotlib not found, cannot plot...")