Esempio n. 1
0
def test_InvWishartRandomWalkProposal():
    dim = 4
    iw = invwishart(np.eye(dim)*5, dim+1)
    for dim in [4]:
        df = stats.poisson.rvs(2)
        iw = invwishart(np.eye(dim)*5, dim+1)
        for K in [iw.rv() for _ in range(3)]:
            pdist = pmc.InvWishartRandomWalkProposal(dim + 1 + df, dim)
            props = np.array([pdist.gen_proposal(mean=K).sample
                                 for _ in range(3000)])
            mse = ((K-props.mean(0))**2).mean()
            assert(np.abs(mse) < 5)
Esempio n. 2
0
def test_InvWishartRandomWalkProposal():
    dim = 4
    iw = invwishart(np.eye(dim) * 5, dim + 1)
    for dim in [4]:
        df = stats.poisson.rvs(2)
        iw = invwishart(np.eye(dim) * 5, dim + 1)
        for K in [iw.rv() for _ in range(3)]:
            pdist = pmc.InvWishartRandomWalkProposal(dim + 1 + df, dim)
            props = np.array(
                [pdist.gen_proposal(mean=K).sample for _ in range(3000)])
            mse = ((K - props.mean(0))**2).mean()
            assert (np.abs(mse) < 5)
def approximate_mixture_data():
    num_loc_proposals = 2
    num_imp_samp = 1000
    n_comp = 2
    p_comp = np.array([0.7, 0.3])
    dim = 1
    num_obs = 100
    obs = None
    
    means = []
    
    for i in range(n_comp):
        means.append([20*i]*dim)
        if obs is None:            
            obs = dist.mvt(means[-1], np.eye(dim),30).rvs(np.int(np.round(num_obs*p_comp[i])))
        else:
            obs = np.vstack([obs, dist.mvt(means[-1], np.eye(dim),30).rvs(np.int(np.round(num_obs*p_comp[i])))])

    count = {"local_lpost" :0, "local_llhood" :0, "naive_lpost" :0 ,"naive_llhood" :0,"standard_lpost" :0 ,"standard_llhood" :0}
    print(means)
    #return
    def count_closure(name):
        def rval():
            count[name] = count[name] + 1
        return rval
    
    initial_samples = []
    for _ in range(10):
        initial_samples.append(DirCatTMM(obs, [1]*n_comp, dist.mvt(np.mean(means,0), np.eye(dim)*5, dim),
                                  dist.invwishart(np.eye(dim) * 5, dim+1 ),
                                  stats.gamma(1,scale=1)))
#    (naive_samp, naive_lpost) = pmc.sample(num_imp_samp, initial_samples,
#                               DirCatTMMProposal(naive_multi_proposals = num_loc_proposals,
#                                                     lpost_count = count_closure("naive_lpost"),
#                                                     llhood_count =  count_closure("naive_llhood")),
#                               population_size = 4)
    (infl_samp, infl_lpost) = pmc.sample(num_imp_samp, initial_samples,
                               DirCatTMMProposal(num_local_proposals = num_loc_proposals,
                                                     lpost_count = count_closure("local_lpost"),
                                                     llhood_count =  count_closure("local_llhood")),
                               population_size = 4)
                               
    (stand_samp, stand_lpost) = pmc.sample(num_imp_samp * num_loc_proposals, initial_samples,
                               DirCatTMMProposal(lpost_count = count_closure("standard_lpost"),
                                                     llhood_count =  count_closure("standard_llhood")),
                               population_size = 4)

    print("===============\n",p_comp, means,
#          "\n\n--NAIVE--\n",
#          naive_samp[-1].comp_indic.sum(0), stats.entropy(p_comp, naive_samp[-1].comp_indic.sum(0))+1, count["naive_llhood"], count["naive_lpost"],
          "\n\n--LOCAL--\n",
          infl_samp[-1].comp_indic.sum(0), stats.entropy(p_comp, infl_samp[-1].comp_indic.sum(0))+1, count["local_llhood"], count["local_lpost"],
          "\n\n--STANDARD--\n",
          stand_samp[-1].comp_indic.sum(0), stats.entropy(p_comp, stand_samp[-1].comp_indic.sum(0))+1, count["standard_llhood"], count["standard_lpost"],"\n\n")   
    return {"infl":(infl_samp, infl_lpost), "standard":(stand_samp, stand_lpost)}
Esempio n. 4
0
def test_DirCatTMMProposal():
    num_loc_proposals = 2
    num_imp_samp = 1000
    n_comp = 2
    p_comp = np.array([0.7, 0.3])
    dim = 1
    num_obs = 100
    obs = None
    
    means = []
    
    for i in range(n_comp):
        means.append([20*i]*dim)
        if obs is None:            
            obs = dist.mvt(means[-1], np.eye(dim),30).rvs(np.int(np.round(num_obs*p_comp[i])))
        else:
            obs = np.vstack([obs, dist.mvt(means[-1], np.eye(dim),30).rvs(np.int(np.round(num_obs*p_comp[i])))])

    count = {"local_lpost" :0, "local_llhood" :0, "naive_lpost" :0 ,"naive_llhood" :0,"standard_lpost" :0 ,"standard_llhood" :0}
    print(means)
    #return
    def count_closure(name):
        def rval():
            count[name] = count[name] + 1
        return rval
    
    initial_samples = []
    for _ in range(10):
        initial_samples.append(dis.DirCatTMM(obs, [1]*n_comp, dist.mvt(np.mean(means,0), np.eye(dim)*5, dim),
                                  dist.invwishart(np.eye(dim) * 5, dim+1 ),
                                  stats.gamma(1,scale=1)))
#    (naive_samp, naive_lpost) = pmc.sample(num_imp_samp, initial_samples,
#                               dis.DirCatTMMProposal(naive_multi_proposals = num_loc_proposals,
#                                                     lpost_count = count_closure("naive_lpost"),
#                                                     llhood_count =  count_closure("naive_llhood")),
#                               population_size = 4)
    (infl_samp, infl_lpost) = pmc.sample(num_imp_samp, initial_samples,
                               dis.DirCatTMMProposal(num_local_proposals = num_loc_proposals,
                                                     lpost_count = count_closure("local_lpost"),
                                                     llhood_count =  count_closure("local_llhood")),
                               population_size = 4)
                               
    (stand_samp, stand_lpost) = pmc.sample(num_imp_samp * num_loc_proposals, initial_samples,
                               dis.DirCatTMMProposal(lpost_count = count_closure("standard_lpost"),
                                                     llhood_count =  count_closure("standard_llhood")),
                               population_size = 4)

    print("===============\n",p_comp, means,
#          "\n\n--NAIVE--\n",
#          naive_samp[-1].comp_indic.sum(0), stats.entropy(p_comp, naive_samp[-1].comp_indic.sum(0))+1, count["naive_llhood"], count["naive_lpost"],
          "\n\n--LOCAL--\n",
          infl_samp[-1].comp_indic.sum(0), stats.entropy(p_comp, infl_samp[-1].comp_indic.sum(0))+1, count["local_llhood"], count["local_lpost"],
          "\n\n--STANDARD--\n",
          stand_samp[-1].comp_indic.sum(0), stats.entropy(p_comp, stand_samp[-1].comp_indic.sum(0))+1, count["standard_llhood"], count["standard_lpost"],"\n\n")   
Esempio n. 5
0
def test_DirCatTMM():
    num_obs = 1000
    for dim in range(2,4):
        mu = np.array([11 * (i+1) for i in range(dim)])
        K = np.eye(dim) * 5
        df = dim + 1
        obs_dist = dist.mvt(mu, K, df)
        obs = obs_dist.rvs(num_obs)
        dctmm = dis.DirCatTMM(obs, [1]*dim, obs_dist,
                                      dist.invwishart(np.eye(dim) * 5, dim + 1),
                                      stats.gamma(1, scale=1, loc=dim+1))
        orig_cat_param = dctmm.cat_param
        dctmm.cat_param = np.zeros(dim)
        for i in range(dim):
            dctmm.cat_param[i] = 1
            ### Test DirCatTMM.lpost_comp_indic ###
            for j in range(dim):
                c_indic = np.zeros(dim)
                c_indic[j] = 1
                for o in range(obs.shape[0]):
                    if i == j:
                        assert(dctmm.lpost_comp_indic(c_indic, o) > -np.inf)
                    else:
                        assert(dctmm.lpost_comp_indic(c_indic, o) == -np.inf)
                c_indic[j] = 0
            ### Test DirCatTMM.llhood_comp_param ###
            highest = dctmm.llhood_comp_param((mu, K, df), i)
            assert(highest >= dctmm.llhood_comp_param((-mu, K, df), i))
            assert(highest >= dctmm.llhood_comp_param((mu, K*5, df), i))
            assert(highest >= dctmm.llhood_comp_param((mu, K/2, df), i))
            assert(highest >= dctmm.llhood_comp_param((mu, K, df+10), i))
            dctmm.cat_param[i] = 0
        
        
        ### Test DirCatTMM.lprior ###
        dctmm.cat_param = np.array(dctmm.dir_param / dctmm.dir_param.sum())
        dctmm.comp_indic = dist.categorical(dctmm.cat_param).rvs(num_obs, indic = True)
        dctmm.update_comp_dists([(mu, K, df)] * dim)
        highest = dctmm.lprior()
        
        c_param = dctmm.dir_param + np.arange(dim)
        dctmm.cat_param = np.array(c_param / c_param.sum())
        ch_cat_param = dctmm.lprior()
        assert(highest > ch_cat_param)
        dctmm.update_comp_dists([(-mu, K, df)] * dim)
        assert(ch_cat_param > dctmm.lprior())
Esempio n. 6
0
    def gen_proposal(self, ancestor=None, mean=None):
        assert ((mean is not None and np.prod(mean.shape) == self.dim**2)
                or (ancestor is not None and ancestor.sample is not None
                    and np.prod(ancestor.sample.shape) == self.dim**2))

        if mean is None and ancestor is not None:
            mean = ancestor.sample

        scale_matr = mean * (self.df - self.dim - 1)
        pdist = invwishart(scale_matr, self.df)
        if not hasattr(pdist, "rvs"):
            pdist.__dict__["rvs"] = pdist.rv  #some trickery

        return gen_sample_prototype(ancestor,
                                    self,
                                    prop_dist=pdist,
                                    lpost_func=self.lpost)
Esempio n. 7
0
    def gen_proposal(self, ancestor = None, mean = None):
        assert((mean is not None and
                np.prod(mean.shape) == self.dim**2) or
               (ancestor is not None and
               ancestor.sample is not None and
               np.prod(ancestor.sample.shape) == self.dim**2))
               
        if mean is None and ancestor is not None:
            mean = ancestor.sample
                
        scale_matr = mean * (self.df - self.dim - 1)
        pdist = invwishart(scale_matr, self.df)
        if not hasattr(pdist, "rvs"):
            pdist.__dict__["rvs"] = pdist.rv #some trickery

        return gen_sample_prototype(ancestor,
                                    self,
                                    prop_dist = pdist,
                                    lpost_func = self.lpost)
def approximate_iris_mixture_data():
    from sklearn.datasets import load_iris
    num_imp_samp = 100
    num_loc_proposals = 3
    n_comp = 3
    p_comp = np.array([1/n_comp] * n_comp)
    dim = 4
    iris = load_iris()
    obs = iris["data"]
    labels = iris["target"]
    means = np.array([obs[i*50:(i+1)*50].mean(0) for i in range(3)])

    count = {"local_lpost" :0, "local_llhood" :0, "naive_lpost" :0 ,"naive_llhood" :0,"standard_lpost" :0 ,"standard_llhood" :0}

    def count_closure(name):
        def rval():
            count[name] = count[name] + 1
        return rval
    
    initial_samples = []
    for _ in range(10):
        initial_samples.append(DirCatTMM(obs, [1]*n_comp, dist.mvt(obs.mean(0), np.diag(obs.var(0)), 20),
                                  dist.invwishart(np.eye(dim), 50),
                                  stats.gamma(500, scale=0.1)))
        
    (infl_samp, infl_lpost) = pmc.sample(num_imp_samp, initial_samples,
                               DirCatTMMProposal(num_local_proposals = num_loc_proposals,
                                                     lpost_count = count_closure("local_lpost"),
                                                     llhood_count =  count_closure("local_llhood")),
                               population_size = 4)
                               
    (stand_samp, stand_lpost) = pmc.sample(num_imp_samp * num_loc_proposals, initial_samples,
                               DirCatTMMProposal(lpost_count = count_closure("standard_lpost"),
                                                     llhood_count =  count_closure("standard_llhood")),
                               population_size = 4)

    print("===============\n",p_comp, means,
          "\n\n--LOCAL--\n",
          infl_samp[-1].comp_indic.sum(0), stats.entropy(p_comp, infl_samp[-1].cat_param.flatten()), count["local_llhood"], count["local_lpost"],
          "\n\n--STANDARD--\n",
          stand_samp[-1].comp_indic.sum(0), stats.entropy(p_comp, stand_samp[-1].cat_param.flatten()), count["standard_llhood"], count["standard_lpost"],"\n\n")   
    return {"infl":(infl_samp, infl_lpost), "standard":(stand_samp, stand_lpost)}
 def gen_proposal(self, ancestor = None, mean = None):
     assert((mean is not None and
             np.prod(mean.shape) == self.dim**2) or
            (ancestor is not None and
            ancestor.sample is not None and
            np.prod(ancestor.sample.shape) == self.dim**2))
     rval = PmcSample(ancestor, prop_obj = self)        
     if mean is None and ancestor is not None:
         if ancestor.sample is not None:
             mean = ancestor.sample
         else:
             mean = np.zeros(self.pdist.rvs().shape)
     scale_matr = mean * (self.df - self.dim - 1)
     pdist = invwishart(scale_matr, self.df)
     rval.sample = pdist.rv()
     rval.lpost = self.lpost(rval.sample)
     rval.lprop = pdist.logpdf(rval.sample)
     if rval.lpost is not None:
         rval.lweight = rval.lpost - rval.lprop
     else:
         rval.lweight = None
         
     return rval
          stand_samp[-1].comp_indic.sum(0), stats.entropy(p_comp, stand_samp[-1].cat_param.flatten()), count["standard_llhood"], count["standard_lpost"],"\n\n")   
    return {"infl":(infl_samp, infl_lpost), "standard":(stand_samp, stand_lpost)}
          

if __name__ == "__main__":
    import scipy.io as io
    
    of3 = io.loadmat("data/oilFlow3Class.mat")
    of3_lab = np.vstack((of3["DataTrnLbls"], of3["DataTstLbls"],of3["DataVdnLbls"],))
    of3 = np.vstack((of3["DataTrn"], of3["DataTst"],of3["DataVdn"],))*100

    initial = [DirCatTMM(of3,
                         [1]*3, 
                         dist.mvnorm([0]*12, 
                         np.eye(12)), 
                         dist.invwishart(np.eye(12)*5, 12), 
                         stats.gamma(1,scale=1)) for _ in range(10)]
                         
    count = {"local_lpost" :0, "local_llhood" :0, "naive_lpost" :0 ,"naive_llhood" :0}
        
    def count_closure(name):
        def rval():
            count[name] = count[name] + 1
        return rval
            
    samps = pmc.sample(50,
                       initial,
                       DirCatTMMProposal(lpost_count = count_closure("naive_lpost"), llhood_count = count_closure("naive_lpost")),
                       population_size=5,
                       quiet=False)
    
Esempio n. 11
0
def approximate_iris_mixture_data():
    from sklearn.datasets import load_iris
    num_imp_samp = 100
    num_loc_proposals = 3
    n_comp = 3
    p_comp = np.array([1 / n_comp] * n_comp)
    dim = 4
    iris = load_iris()
    obs = iris["data"]
    labels = iris["target"]
    means = np.array([obs[i * 50:(i + 1) * 50].mean(0) for i in range(3)])

    count = {
        "local_lpost": 0,
        "local_llhood": 0,
        "naive_lpost": 0,
        "naive_llhood": 0,
        "standard_lpost": 0,
        "standard_llhood": 0
    }

    def count_closure(name):
        def rval():
            count[name] = count[name] + 1

        return rval

    initial_samples = []
    for _ in range(10):
        initial_samples.append(
            DirCatTMM(obs, [1] * n_comp,
                      dist.mvt(obs.mean(0), np.diag(obs.var(0)), 20),
                      dist.invwishart(np.eye(dim), 50),
                      stats.gamma(500, scale=0.1)))

    (infl_samp, infl_lpost) = pmc.sample(
        num_imp_samp,
        initial_samples,
        DirCatTMMProposal(num_local_proposals=num_loc_proposals,
                          lpost_count=count_closure("local_lpost"),
                          llhood_count=count_closure("local_llhood")),
        population_size=4)

    (stand_samp, stand_lpost) = pmc.sample(
        num_imp_samp * num_loc_proposals,
        initial_samples,
        DirCatTMMProposal(lpost_count=count_closure("standard_lpost"),
                          llhood_count=count_closure("standard_llhood")),
        population_size=4)

    print("===============\n", p_comp, means, "\n\n--LOCAL--\n",
          infl_samp[-1].comp_indic.sum(0),
          stats.entropy(p_comp, infl_samp[-1].cat_param.flatten()),
          count["local_llhood"], count["local_lpost"], "\n\n--STANDARD--\n",
          stand_samp[-1].comp_indic.sum(0),
          stats.entropy(p_comp, stand_samp[-1].cat_param.flatten()),
          count["standard_llhood"], count["standard_lpost"], "\n\n")
    return {
        "infl": (infl_samp, infl_lpost),
        "standard": (stand_samp, stand_lpost)
    }
Esempio n. 12
0
    of3 = io.loadmat("data/oilFlow3Class.mat")
    of3_lab = np.vstack((
        of3["DataTrnLbls"],
        of3["DataTstLbls"],
        of3["DataVdnLbls"],
    ))
    of3 = np.vstack((
        of3["DataTrn"],
        of3["DataTst"],
        of3["DataVdn"],
    )) * 100

    initial = [
        DirCatTMM(of3, [1] * 3, dist.mvnorm([0] * 12, np.eye(12)),
                  dist.invwishart(np.eye(12) * 5, 12), stats.gamma(1, scale=1))
        for _ in range(10)
    ]

    count = {
        "local_lpost": 0,
        "local_llhood": 0,
        "naive_lpost": 0,
        "naive_llhood": 0
    }

    def count_closure(name):
        def rval():
            count[name] = count[name] + 1

        return rval