def pdf_comp(self, x, cid, log=False): """Computes the pdf of the model at given points, at given component. :Parameters: x : ndarray points where to estimate the pdf. One row for one multi-dimensional sample (eg to estimate the pdf at 100 different points in 10 dimension, data's shape should be (100, 20)). cid: int the component index. log : bool If true, returns the log pdf instead of the pdf. :Returns: y : ndarray the pdf at points x.""" if self.mode == 'diag': va = self.va[cid] elif self.mode == 'full': va = self.va[cid * self.d:(cid + 1) * self.d] else: raise GmParamError("""var mode %s not supported""" % self.mode) if log: return D.gauss_den(x, self.mu[cid], va, log = True) \ + N.log(self.w[cid]) else: return D.multiple_gauss_den(x, self.mu[cid], va) * self.w[cid]
def pdf_comp(self, x, cid, log=False): """Computes the pdf of the model at given points, at given component. :Parameters: x : ndarray points where to estimate the pdf. One row for one multi-dimensional sample (eg to estimate the pdf at 100 different points in 10 dimension, data's shape should be (100, 20)). cid: int the component index. log : bool If true, returns the log pdf instead of the pdf. :Returns: y : ndarray the pdf at points x.""" if self.mode == "diag": va = self.va[cid] elif self.mode == "full": va = self.va[cid * self.d : (cid + 1) * self.d] else: raise GmParamError("""var mode %s not supported""" % self.mode) if log: return D.gauss_den(x, self.mu[cid], va, log=True) + N.log(self.w[cid]) else: return D.multiple_gauss_den(x, self.mu[cid], va) * self.w[cid]
def pdf(self, x, log=False): """Computes the pdf of the model at given points. :Parameters: x : ndarray points where to estimate the pdf. One row for one multi-dimensional sample (eg to estimate the pdf at 100 different points in 10 dimension, data's shape should be (100, 20)). log : bool If true, returns the log pdf instead of the pdf. :Returns: y : ndarray the pdf at points x.""" if log: return D.logsumexp(D.multiple_gauss_den(x, self.mu, self.va, log=True) + N.log(self.w)) else: return N.sum(D.multiple_gauss_den(x, self.mu, self.va) * self.w, 1)
def likelihood(self, data): """ Returns the current log likelihood of the model given the data """ assert (self.isinit) # compute the gaussian pdf tgd = densities.multiple_gauss_den(data, self.gm.mu, self.gm.va) # multiply by the weight tgd *= self.gm.w return N.sum(N.log(N.sum(tgd, axis=1)), axis=0)
def likelihood(self, data): """ Returns the current log likelihood of the model given the data """ assert(self.isinit) # compute the gaussian pdf tgd = densities.multiple_gauss_den(data, self.gm.mu, self.gm.va) # multiply by the weight tgd *= self.gm.w return N.sum(N.log(N.sum(tgd, axis = 1)), axis = 0)
def pdf(self, x, log=False): """Computes the pdf of the model at given points. :Parameters: x : ndarray points where to estimate the pdf. One row for one multi-dimensional sample (eg to estimate the pdf at 100 different points in 10 dimension, data's shape should be (100, 20)). log : bool If true, returns the log pdf instead of the pdf. :Returns: y : ndarray the pdf at points x.""" if log: return D.logsumexp( D.multiple_gauss_den(x, self.mu, self.va, log=True) + N.log(self.w)) else: return N.sum(D.multiple_gauss_den(x, self.mu, self.va) * self.w, 1)
def compute_responsabilities(self, data): """Compute responsabilities. Return normalized and non-normalized respondabilities for the model. Note ---- Computes the latent variable distribution (a posteriori probability) knowing the explicit data for the Gaussian model (w, mu, var): gamma(t, i) = P[state = i | observation = data(t); w, mu, va] This is basically the E step of EM for finite mixtures.""" # compute the gaussian pdf tgd = densities.multiple_gauss_den(data, self.gm.mu, self.gm.va) # multiply by the weight tgd *= self.gm.w # Normalize to get a pdf gd = tgd / N.sum(tgd, axis=1)[:, N.newaxis] return gd, tgd
def compute_log_responsabilities(self, data): """Compute log responsabilities. Return normalized and non-normalized responsabilities for the model (in the log domain) Note ---- Computes the latent variable distribution (a posteriori probability) knowing the explicit data for the Gaussian model (w, mu, var): gamma(t, i) = P[state = i | observation = data(t); w, mu, va] This is basically the E step of EM for finite mixtures.""" # compute the gaussian pdf tgd = densities.multiple_gauss_den(data, self.gm.mu, self.gm.va, log = True) # multiply by the weight tgd += N.log(self.gm.w) # Normalize to get a (log) pdf gd = tgd - densities.logsumexp(tgd)[:, N.newaxis] return gd, tgd
def compute_log_responsabilities(self, data): """Compute log responsabilities. Return normalized and non-normalized responsabilities for the model (in the log domain) Note ---- Computes the latent variable distribution (a posteriori probability) knowing the explicit data for the Gaussian model (w, mu, var): gamma(t, i) = P[state = i | observation = data(t); w, mu, va] This is basically the E step of EM for finite mixtures.""" # compute the gaussian pdf tgd = densities.multiple_gauss_den(data, self.gm.mu, self.gm.va, log=True) # multiply by the weight tgd += N.log(self.gm.w) # Normalize to get a (log) pdf gd = tgd - densities.logsumexp(tgd)[:, N.newaxis] return gd, tgd
def plot1d(self, level=misc.DEF_LEVEL, fill=False, gpdf=False): """Plots the pdf of each component of the 1d mixture. :Parameters: level : int level of confidence (to use with fill argument) fill : bool if True, the area of the pdf corresponding to the given confidence intervales is filled. gpdf : bool if True, the global pdf is plot. :Returns: h : dict Returns a dictionary h of plot handles so that their properties can be modified (eg color, label, etc...): - h['pdf'] is a list of lines, one line per component pdf - h['gpdf'] is the line for the global pdf - h['conf'] is a list of filling area """ if not self.__is1d: raise ValueError("This function does not make sense for " "mixtures which are not unidimensional") from scipy.stats import norm pval = N.sqrt(self.va[:, 0]) * norm(0, 1).ppf((1 + level) / 2) # Compute reasonable min/max for the normal pdf: [-mc * std, mc * std] # gives the range we are taking in account for each gaussian mc = 3 std = N.sqrt(self.va[:, 0]) mi = N.amin(self.mu[:, 0] - mc * std) ma = N.amax(self.mu[:, 0] + mc * std) np = 500 x = N.linspace(mi, ma, np) # Prepare the dic of plot handles to return ks = ["pdf", "conf", "gpdf"] hp = dict((i, []) for i in ks) # Compute the densities y = D.multiple_gauss_den(x[:, N.newaxis], self.mu, self.va, log=True) + N.log(self.w) yt = self.pdf(x[:, N.newaxis]) try: import pylab as P for c in range(self.k): h = P.plot(x, N.exp(y[:, c]), "r", label="_nolegend_") hp["pdf"].extend(h) if fill: # Compute x coordinates of filled area id1 = -pval[c] + self.mu[c] id2 = pval[c] + self.mu[c] xc = x[:, N.where(x > id1)[0]] xc = xc[:, N.where(xc < id2)[0]] # Compute the graph for filling yf = self.pdf_comp(xc, c) xc = N.concatenate(([xc[0]], xc, [xc[-1]])) yf = N.concatenate(([0], yf, [0])) h = P.fill(xc, yf, facecolor="b", alpha=0.1, label="_nolegend_") hp["conf"].extend(h) if gpdf: h = P.plot(x, yt, "r:", label="_nolegend_") hp["gpdf"] = h return hp except ImportError: raise GmParamError("matplotlib not found, cannot plot...")
def plot1d(self, level=misc.DEF_LEVEL, fill=False, gpdf=False): """Plots the pdf of each component of the 1d mixture. :Parameters: level : int level of confidence (to use with fill argument) fill : bool if True, the area of the pdf corresponding to the given confidence intervales is filled. gpdf : bool if True, the global pdf is plot. :Returns: h : dict Returns a dictionary h of plot handles so that their properties can be modified (eg color, label, etc...): - h['pdf'] is a list of lines, one line per component pdf - h['gpdf'] is the line for the global pdf - h['conf'] is a list of filling area """ if not self.__is1d: raise ValueError("This function does not make sense for "\ "mixtures which are not unidimensional") from scipy.stats import norm pval = N.sqrt(self.va[:, 0]) * norm(0, 1).ppf((1 + level) / 2) # Compute reasonable min/max for the normal pdf: [-mc * std, mc * std] # gives the range we are taking in account for each gaussian mc = 3 std = N.sqrt(self.va[:, 0]) mi = N.amin(self.mu[:, 0] - mc * std) ma = N.amax(self.mu[:, 0] + mc * std) np = 500 x = N.linspace(mi, ma, np) # Prepare the dic of plot handles to return ks = ['pdf', 'conf', 'gpdf'] hp = dict((i, []) for i in ks) # Compute the densities y = D.multiple_gauss_den(x[:, N.newaxis], self.mu, self.va, \ log = True) \ + N.log(self.w) yt = self.pdf(x[:, N.newaxis]) try: import pylab as P for c in range(self.k): h = P.plot(x, N.exp(y[:, c]), 'r', label='_nolegend_') hp['pdf'].extend(h) if fill: # Compute x coordinates of filled area id1 = -pval[c] + self.mu[c] id2 = pval[c] + self.mu[c] xc = x[:, N.where(x > id1)[0]] xc = xc[:, N.where(xc < id2)[0]] # Compute the graph for filling yf = self.pdf_comp(xc, c) xc = N.concatenate(([xc[0]], xc, [xc[-1]])) yf = N.concatenate(([0], yf, [0])) h = P.fill(xc, yf, facecolor='b', alpha=0.1, label='_nolegend_') hp['conf'].extend(h) if gpdf: h = P.plot(x, yt, 'r:', label='_nolegend_') hp['gpdf'] = h return hp except ImportError: raise GmParamError("matplotlib not found, cannot plot...")