def __init__(self, X, prior, alpha, K=1, K_max=None): self.alpha = alpha N, _ = X.shape assignments = np.arange(N) self.components = GaussianComponentsDiag(X, prior, assignments, K_max)
def __init__( self, X, prior, alpha, K, assignments="rand", covariance_type="full" ): self.alpha = alpha N, D = X.shape # Initial component assignments if assignments == "rand": assignments = np.random.randint(0, K, N) # Make sure we have consequetive values for k in xrange(assignments.max()): while len(np.nonzero(assignments == k)[0]) == 0: assignments[np.where(assignments > k)] -= 1 if assignments.max() == k: break elif assignments == "each-in-own": assignments = np.arange(N) else: # assignments is a vector pass if covariance_type == "full": self.components = GaussianComponents(X, prior, assignments, K_max=K) elif covariance_type == "diag": self.components = GaussianComponentsDiag(X, prior, assignments, K_max=K) elif covariance_type == "fixed": self.components = GaussianComponentsFixedVar(X, prior, assignments, K_max=K) else: assert False, "Invalid covariance type."
class IGMM(object): def __init__(self, X, prior, alpha, K=1, K_max=None): self.alpha = alpha N, _ = X.shape assignments = np.arange(N) self.components = GaussianComponentsDiag(X, prior, assignments, K_max) def draw(self, p_k): k_uni = random.random() for i in range(len(p_k)): k_uni = k_uni - p_k[i] if k_uni < 0: return i return len(p_k) - 1 ''' Implementation of the Gibbs Sampling algorithm for Dirichlet Process Mixture Models. See artifacts/Mardale, Doust, Couge, Ekpo_PGM_Deliverable_2.pdf for the mathematical derivations. ''' def gibbs_sample(self, n_iter): record_dict = {} record_dict["components"] = [] for i_iter in range(n_iter): for i in range(self.components.N): k_old = self.components.assignments[i] K_old = self.components.K stats_old = self.components.cache_component_stats(k_old) self.components.del_item(i) log_prob_z = np.zeros(self.components.K + 1, np.float) log_prob_z[:self.components.K] = np.log( (self.components.counts[:self.components.K]) / (self.components.N + self.alpha - 1)) + self.components.log_post_pred(i) log_prob_z[-1] = math.log( self.alpha / (self.components.N + self.alpha - 1)) + self.components.cached_log_prior[i] # andrei: log-sim-exp trick / like normalization prob_z = np.exp(log_prob_z - logsumexp(log_prob_z)) k = self.draw(prob_z) if k == k_old and self.components.K == K_old: self.components.restore_component_from_stats( k_old, *stats_old) self.components.assignments[i] = k_old else: self.components.add_item(i, k) record_dict["components"].append(self.components.K - 1) return record_dict
def setup_components(self, K, assignments="rand", X=None): """ Setup the `components` attribute. See parameters of `FBGMM` for parameters not described below. This function is also useful for resetting the `components`, e.g. if you want to change the maximum number of possible components. Parameters ---------- X : NxD matrix or None The data matrix. If None, then it is assumed that the `components` attribute has already been initialized and that this function is called to reset the `components`; in this case the data is taken from the previous initialization. """ if X is None: assert hasattr(self, "components") X = self.components.X N, D = X.shape # Initial component assignments if isinstance(assignments, basestring) and assignments == "rand": assignments = np.random.randint(0, K, N) elif isinstance(assignments, basestring) and assignments == "each-in-own": assignments = np.arange(N) else: # `assignments` is a vector pass # Make sure we have consequetive values for k in xrange(assignments.max()): while len(np.nonzero(assignments == k)[0]) == 0: assignments[np.where(assignments > k)] -= 1 if assignments.max() == k: break if self.covariance_type == "full": self.components = GaussianComponents(X, self.prior, assignments, K_max=K) elif self.covariance_type == "diag": self.components = GaussianComponentsDiag(X, self.prior, assignments, K_max=K) elif self.covariance_type == "fixed": self.components = GaussianComponentsFixedVar(X, self.prior, assignments, K_max=K) else: assert False, "Invalid covariance type."
def __init__( self, X, prior, alpha, assignments="rand", K=1, K_max=None): self.alpha = alpha N, D = X.shape if assignments == "rand": assignments = np.random.randint(0, K, N) for k in range(assignments.max()): while len(np.nonzero(assignments == k)[0]) == 0: assignments[np.where(assignments > k)] -= 1 if assignments.max() == k: break elif assignments == "one-by-one": assignments = -1*np.ones(N, dtype="int") assignments[0] = 0 # first data vector belongs to first component elif assignments == "each-in-own": assignments = np.arange(N) self.components = GaussianComponentsDiag(X, prior, assignments, K_max)
class IGMM(object): def __init__( self, X, prior, alpha, assignments="rand", K=1, K_max=None): self.alpha = alpha N, D = X.shape if assignments == "rand": assignments = np.random.randint(0, K, N) for k in range(assignments.max()): while len(np.nonzero(assignments == k)[0]) == 0: assignments[np.where(assignments > k)] -= 1 if assignments.max() == k: break elif assignments == "one-by-one": assignments = -1*np.ones(N, dtype="int") assignments[0] = 0 # first data vector belongs to first component elif assignments == "each-in-own": assignments = np.arange(N) self.components = GaussianComponentsDiag(X, prior, assignments, K_max) def draw(self, p_k): k_uni = random.random() for i in range(len(p_k)): k_uni = k_uni - p_k[i] if k_uni < 0: return i return len(p_k) - 1 def gibbs_sample(self, n_iter): record_dict = {} record_dict["components"] = [] for i_iter in range(n_iter): for i in range(self.components.N): k_old = self.components.assignments[i] K_old = self.components.K stats_old = self.components.cache_component_stats(k_old) self.components.del_item(i) log_prob_z = np.zeros(self.components.K + 1, np.float) log_prob_z[:self.components.K] = np.log((self.components.counts[:self.components.K]) / (self.components.N + self.alpha -1))+ self.components.log_post_pred(i) log_prob_z[-1] = math.log(self.alpha / (self.components.N + self.alpha -1)) + self.components.cached_log_prior[i] #paul: log-sim-exp trick / like normalization prob_z = np.exp(log_prob_z - logsumexp(log_prob_z)) k = self.draw(prob_z) if k == k_old and self.components.K == K_old: self.components.restore_component_from_stats(k_old, *stats_old) self.components.assignments[i] = k_old else: self.components.add_item(i, k) record_dict["components"].append(self.components.K - 1) return record_dict