def __init__(self, num_obs, num_dim, K, component_type='diag_gaussian', component_options={}, **kwargs): self.num_obs = num_obs self.num_dim = num_dim self.K = K self.param = Map(component_type=component_type, component_options=component_options, **kwargs) self.component = create_mixture_component(component_type, num_dim, **component_options) return
def init_approx(self, sampler, init_likelihood=True): if not isinstance(sampler, ep_clustering.GibbsSampler): raise TypeError("likelihood must be Likelihood object") self.K = sampler.K if init_likelihood: if self.separate_likeparams: self.likelihood = [sampler.likelihood.deepcopy() for k in range(self.K)] sampler.state.likelihood_parameter = [ likelihood.parameter for likelihood in self.likelihood] else: self.likelihood = sampler.likelihood theta_prior = self.get_likelihood().theta_prior if not isinstance(theta_prior, self.exp_family): raise TypeError("likelihood prior does not match EP exp_family") parameters = Map( post_approx = [ theta_prior.copy() for k in range(0, self.K) ], site_approx = [ self.exp_family(num_dim = sampler.num_dim) for ii in range(0, sampler.num_obs) ], ) self.parameters.update(parameters) sampler.sample_theta() sampler.update_approx_alg() self._sampler = sampler return
def sample_parameters(self, prior={}): """ Sample parameters Args: prior (dict): (optional) mean_mean (ndarray): mean for mean mean_sd (ndarray): standard deviation for mean cov_psi (ndarray): scale matrix parameter for inverse Wishart cov_nu (double): df parameter for inverse Wishart df_alpha (double): shape for Gamma df_beta (double): rate for Gamma """ if not isinstance(prior, dict): raise TypeError("Prior must be dict not '{0}'".format(type(prior))) mean_mean = prior.get("mean_mean", np.zeros(self.num_dim)) mean_sd = prior.get("mean_sd", np.ones(self.num_dim)) cov_psi = prior.get("cov_psi", np.eye(self.num_dim)) cov_nu = prior.get("cov_nu", self.num_dim + 2) df_alpha = prior.get("df_alpha", 8.0) df_beta = prior.get("df_beta", 4.0) mean = np.random.normal(size=self.num_dim) * mean_sd + mean_mean cov = invwishart.rvs(df=cov_nu, scale=cov_psi) df = gamma.rvs(a=df_alpha, scale=1.0 / df_beta) parameters = Map(mean=mean, cov=cov, df=df) return parameters
def sample_parameters(self, prior={}): """ Sample parameters Args: prior (dict): (optional) mean_mean (double or ndarray): mean for mean mean_sd (double or ndarray): standard deviation for mean variance_alpha (double or ndarray): shape parameter for inverse Gamma variance_beta (double or ndarray): rate parameter for inverse Gamma """ if not isinstance(prior, dict): raise TypeError("Prior must be dict not '{0}'".format(type(prior))) mean_mean = prior.get("mean_mean", 0.0) mean_sd = prior.get("mean_sd", 2.0) variance_alpha = prior.get("sd_alpha", 5.0) variance_beta = prior.get("sd_beta", 5.0) mean = np.random.normal(size=self.num_dim) * mean_sd + mean_mean variance = 1.0 / np.random.gamma( shape=variance_alpha, scale=1.0 / variance_beta, size=self.num_dim, ) parameters = Map(mean=mean, variance=variance) return parameters
def _parse_param(self, **kwargs): # Defines self.param default = { 'sigma2_x': 1.0, 'A': None, 'sigma2_y': None, 'sigma2_theta': 1.0, 'lambduh': None, 'missing_obs': 0.0, 'x_0': None, } for key, value in kwargs.items(): if key in default.keys(): default[key] = value param = Map(default) # Handle variable arg defaults if param.A is None: param.A = 0.99 * np.ones(self.num_obs) if param.lambduh is None: param.lambduh = np.ones(self.num_obs) if param.sigma2_y is None: param.sigma2_y = np.ones(self.num_obs) if param.x_0 is None: var_0 = param.sigma2_x * (1.0 / (1.0 - param.A**2)) param.x_0 = np.random.normal(0, 1, self.num_obs) * np.sqrt(var_0) self.param = param return
def generate_data(self): """ Generate Data Returns: data (MixtureData) """ # Get Proportions if 'cluster_proportions' not in self.param: self.param.cluster_proportions = self.generate_cluster_proportions( ) # Get Component Parameters if 'cluster_parameters' not in self.param: self.param.cluster_parameters = self.generate_cluster_parameters() else: self.param.cluster_parameters = [ Map(cluster_parameter) for cluster_parameter in self.param.cluster_parameters ] # Generate Data z = np.array([ _categorical_sample(probs=self.param.cluster_proportions) for i in range(0, self.num_obs) ], dtype=int) matrix = np.zeros((self.num_obs, self.num_dim)) for ii, z_ii in enumerate(z): matrix[ii, :] = self.component.sample_observation( self.param.cluster_parameters[z_ii]) # Format Output data = MixtureData( matrix=matrix, z=z, num_obs=self.num_obs, num_dim=self.num_dim, K=self.K, parameters=self.param, ) return data
def __init__(self, separate_likeparams=False, debug=False, **kwargs): self.parameters = Map() self.debug = debug self.separate_likeparams = separate_likeparams return
class MixtureDataGenerator(object): """ Mixture Model Data Generator Args: num_obs (int): number of observations num_dim (int): number of dimensions K (int): number clusters component_type (string): name (see create_mixture_component) component_options (dict): optional kwargs args for create_mixture_component **kwargs (dict): `Cluster Proportion Probabilities cluster_proportions (ndarray): cluster proportion probabilities or proportion_prior (ndarray): parameter for Dirichlet prior `Cluster Component Parameters` cluster_parameters (list of dict): parameters for component or component_prior (dict): args for `generate_component_parameters` Examples: my_data = MixtureDataGenerator(num_obs=100, num_dim=2, K=3) my_data_2 = MixtureDataGenerator(num_obs=100, num_dim=2, K=3, component_type = "gaussian") my_data_3 = MixtureDataGenerator(num_obs=100, num_dim=2, K=3, component_prior = {'mean_sd': 10}) my_data_4 = MixtureDataGenerator(num_obs=100, num_dim=1, K=10, component_parameters = [ {'mean': np.array([10]), 'variance': np.array([1])}, {'mean': np.array([-10]), 'variance': np.array([1])}, ]) Methods: generate_cluster_proportions(proportion_prior): cluster_proportions generate_cluster_parameters(component_prior): component_parameters generate_data(): returns data """ def __init__(self, num_obs, num_dim, K, component_type='diag_gaussian', component_options={}, **kwargs): self.num_obs = num_obs self.num_dim = num_dim self.K = K self.param = Map(component_type=component_type, component_options=component_options, **kwargs) self.component = create_mixture_component(component_type, num_dim, **component_options) return def generate_cluster_proportions(self, proportion_prior=None): if proportion_prior is not None: self.param.proportion_prior = proportion_prior if 'proportion_prior' not in self.param: self.param.proportion_prior = 100 * np.ones(self.K) cluster_proportions = np.random.dirichlet( alpha=self.param.proportion_prior, size=1) return cluster_proportions def generate_cluster_parameters(self, component_prior=None): if component_prior is not None: self.param.component_prior = component_prior cluster_parameters = \ [self.component.sample_parameters( self.param.get('component_prior', {}) ) for k in range(self.K)] return cluster_parameters def generate_data(self): """ Generate Data Returns: data (MixtureData) """ # Get Proportions if 'cluster_proportions' not in self.param: self.param.cluster_proportions = self.generate_cluster_proportions( ) # Get Component Parameters if 'cluster_parameters' not in self.param: self.param.cluster_parameters = self.generate_cluster_parameters() else: self.param.cluster_parameters = [ Map(cluster_parameter) for cluster_parameter in self.param.cluster_parameters ] # Generate Data z = np.array([ _categorical_sample(probs=self.param.cluster_proportions) for i in range(0, self.num_obs) ], dtype=int) matrix = np.zeros((self.num_obs, self.num_dim)) for ii, z_ii in enumerate(z): matrix[ii, :] = self.component.sample_observation( self.param.cluster_parameters[z_ii]) # Format Output data = MixtureData( matrix=matrix, z=z, num_obs=self.num_obs, num_dim=self.num_dim, K=self.K, parameters=self.param, ) return data