コード例 #1
0
 def __init__(self,
              num_obs,
              num_dim,
              K,
              component_type='diag_gaussian',
              component_options={},
              **kwargs):
     self.num_obs = num_obs
     self.num_dim = num_dim
     self.K = K
     self.param = Map(component_type=component_type,
                      component_options=component_options,
                      **kwargs)
     self.component = create_mixture_component(component_type, num_dim,
                                               **component_options)
     return
コード例 #2
0
    def init_approx(self, sampler, init_likelihood=True):
        if not isinstance(sampler, ep_clustering.GibbsSampler):
            raise TypeError("likelihood must be Likelihood object")
        self.K = sampler.K
        if init_likelihood:
            if self.separate_likeparams:
                self.likelihood = [sampler.likelihood.deepcopy()
                        for k in range(self.K)]
                sampler.state.likelihood_parameter = [
                        likelihood.parameter
                        for likelihood in self.likelihood]
            else:
                self.likelihood = sampler.likelihood

        theta_prior = self.get_likelihood().theta_prior
        if not isinstance(theta_prior, self.exp_family):
            raise TypeError("likelihood prior does not match EP exp_family")
        parameters = Map(
                post_approx = [ theta_prior.copy()
                    for k in range(0, self.K) ],
                site_approx = [ self.exp_family(num_dim = sampler.num_dim)
                    for ii in range(0, sampler.num_obs) ],
                )
        self.parameters.update(parameters)
        sampler.sample_theta()
        sampler.update_approx_alg()
        self._sampler = sampler
        return
コード例 #3
0
    def sample_parameters(self, prior={}):
        """ Sample parameters
        Args:
            prior (dict): (optional)
                mean_mean (ndarray): mean for mean
                mean_sd (ndarray): standard deviation for mean
                cov_psi (ndarray): scale matrix parameter for inverse Wishart
                cov_nu (double): df parameter for inverse Wishart
                df_alpha (double): shape for Gamma
                df_beta (double): rate for Gamma
        """
        if not isinstance(prior, dict):
            raise TypeError("Prior must be dict not '{0}'".format(type(prior)))
        mean_mean = prior.get("mean_mean", np.zeros(self.num_dim))
        mean_sd = prior.get("mean_sd", np.ones(self.num_dim))
        cov_psi = prior.get("cov_psi", np.eye(self.num_dim))
        cov_nu = prior.get("cov_nu", self.num_dim + 2)
        df_alpha = prior.get("df_alpha", 8.0)
        df_beta = prior.get("df_beta", 4.0)

        mean = np.random.normal(size=self.num_dim) * mean_sd + mean_mean
        cov = invwishart.rvs(df=cov_nu, scale=cov_psi)
        df = gamma.rvs(a=df_alpha, scale=1.0 / df_beta)
        parameters = Map(mean=mean, cov=cov, df=df)
        return parameters
コード例 #4
0
    def sample_parameters(self, prior={}):
        """ Sample parameters
        Args:
            prior (dict): (optional)
                mean_mean (double or ndarray): mean for mean
                mean_sd (double or ndarray): standard deviation for mean
                variance_alpha (double or ndarray):
                    shape parameter for inverse Gamma
                variance_beta (double or ndarray):
                    rate parameter for inverse Gamma
        """
        if not isinstance(prior, dict):
            raise TypeError("Prior must be dict not '{0}'".format(type(prior)))
        mean_mean = prior.get("mean_mean", 0.0)
        mean_sd = prior.get("mean_sd", 2.0)
        variance_alpha = prior.get("sd_alpha", 5.0)
        variance_beta = prior.get("sd_beta", 5.0)

        mean = np.random.normal(size=self.num_dim) * mean_sd + mean_mean
        variance = 1.0 / np.random.gamma(
            shape=variance_alpha,
            scale=1.0 / variance_beta,
            size=self.num_dim,
        )
        parameters = Map(mean=mean, variance=variance)
        return parameters
コード例 #5
0
    def _parse_param(self, **kwargs):
        # Defines self.param
        default = {
            'sigma2_x': 1.0,
            'A': None,
            'sigma2_y': None,
            'sigma2_theta': 1.0,
            'lambduh': None,
            'missing_obs': 0.0,
            'x_0': None,
        }

        for key, value in kwargs.items():
            if key in default.keys():
                default[key] = value

        param = Map(default)

        # Handle variable arg defaults
        if param.A is None:
            param.A = 0.99 * np.ones(self.num_obs)
        if param.lambduh is None:
            param.lambduh = np.ones(self.num_obs)
        if param.sigma2_y is None:
            param.sigma2_y = np.ones(self.num_obs)
        if param.x_0 is None:
            var_0 = param.sigma2_x * (1.0 / (1.0 - param.A**2))
            param.x_0 = np.random.normal(0, 1, self.num_obs) * np.sqrt(var_0)

        self.param = param
        return
コード例 #6
0
    def generate_data(self):
        """ Generate Data

        Returns:
            data (MixtureData)
        """
        # Get Proportions
        if 'cluster_proportions' not in self.param:
            self.param.cluster_proportions = self.generate_cluster_proportions(
            )

        # Get Component Parameters
        if 'cluster_parameters' not in self.param:
            self.param.cluster_parameters = self.generate_cluster_parameters()
        else:
            self.param.cluster_parameters = [
                Map(cluster_parameter)
                for cluster_parameter in self.param.cluster_parameters
            ]

        # Generate Data
        z = np.array([
            _categorical_sample(probs=self.param.cluster_proportions)
            for i in range(0, self.num_obs)
        ],
                     dtype=int)

        matrix = np.zeros((self.num_obs, self.num_dim))
        for ii, z_ii in enumerate(z):
            matrix[ii, :] = self.component.sample_observation(
                self.param.cluster_parameters[z_ii])

        # Format Output
        data = MixtureData(
            matrix=matrix,
            z=z,
            num_obs=self.num_obs,
            num_dim=self.num_dim,
            K=self.K,
            parameters=self.param,
        )
        return data
コード例 #7
0
 def __init__(self, separate_likeparams=False, debug=False, **kwargs):
     self.parameters = Map()
     self.debug = debug
     self.separate_likeparams = separate_likeparams
     return
コード例 #8
0
class MixtureDataGenerator(object):
    """ Mixture Model Data Generator

    Args:
        num_obs (int): number of observations
        num_dim (int): number of dimensions
        K (int): number clusters
        component_type (string): name (see create_mixture_component)
        component_options (dict): optional kwargs args for
            create_mixture_component
        **kwargs (dict):
            `Cluster Proportion Probabilities
                cluster_proportions (ndarray): cluster proportion probabilities
                    or
                proportion_prior (ndarray): parameter for Dirichlet prior
            `Cluster Component Parameters`
                cluster_parameters (list of dict): parameters for component
                    or
                component_prior (dict): args for `generate_component_parameters`

    Examples:
        my_data = MixtureDataGenerator(num_obs=100, num_dim=2, K=3)
        my_data_2 = MixtureDataGenerator(num_obs=100, num_dim=2, K=3,
            component_type = "gaussian")

        my_data_3 = MixtureDataGenerator(num_obs=100, num_dim=2, K=3,
            component_prior = {'mean_sd': 10})

        my_data_4 = MixtureDataGenerator(num_obs=100, num_dim=1, K=10,
            component_parameters = [
                {'mean': np.array([10]), 'variance': np.array([1])},
                {'mean': np.array([-10]), 'variance': np.array([1])},
            ])

    Methods:
        generate_cluster_proportions(proportion_prior): cluster_proportions
        generate_cluster_parameters(component_prior): component_parameters
        generate_data(): returns data
    """
    def __init__(self,
                 num_obs,
                 num_dim,
                 K,
                 component_type='diag_gaussian',
                 component_options={},
                 **kwargs):
        self.num_obs = num_obs
        self.num_dim = num_dim
        self.K = K
        self.param = Map(component_type=component_type,
                         component_options=component_options,
                         **kwargs)
        self.component = create_mixture_component(component_type, num_dim,
                                                  **component_options)
        return

    def generate_cluster_proportions(self, proportion_prior=None):
        if proportion_prior is not None:
            self.param.proportion_prior = proportion_prior
        if 'proportion_prior' not in self.param:
            self.param.proportion_prior = 100 * np.ones(self.K)

        cluster_proportions = np.random.dirichlet(
            alpha=self.param.proportion_prior, size=1)

        return cluster_proportions

    def generate_cluster_parameters(self, component_prior=None):
        if component_prior is not None:
            self.param.component_prior = component_prior

        cluster_parameters = \
                [self.component.sample_parameters(
                    self.param.get('component_prior', {})
                    ) for k in range(self.K)]

        return cluster_parameters

    def generate_data(self):
        """ Generate Data

        Returns:
            data (MixtureData)
        """
        # Get Proportions
        if 'cluster_proportions' not in self.param:
            self.param.cluster_proportions = self.generate_cluster_proportions(
            )

        # Get Component Parameters
        if 'cluster_parameters' not in self.param:
            self.param.cluster_parameters = self.generate_cluster_parameters()
        else:
            self.param.cluster_parameters = [
                Map(cluster_parameter)
                for cluster_parameter in self.param.cluster_parameters
            ]

        # Generate Data
        z = np.array([
            _categorical_sample(probs=self.param.cluster_proportions)
            for i in range(0, self.num_obs)
        ],
                     dtype=int)

        matrix = np.zeros((self.num_obs, self.num_dim))
        for ii, z_ii in enumerate(z):
            matrix[ii, :] = self.component.sample_observation(
                self.param.cluster_parameters[z_ii])

        # Format Output
        data = MixtureData(
            matrix=matrix,
            z=z,
            num_obs=self.num_obs,
            num_dim=self.num_dim,
            K=self.K,
            parameters=self.param,
        )
        return data