Ejemplo n.º 1
0
def sample(num_samples, initial_guesses, proposal_method, population_size = 20, stop_flag = flags.NeverStopFlag(), quiet = True):
    num_initial = len(initial_guesses)
    rval =  [PmcSample(sample=s) for s in initial_guesses]
    
    while len(rval) - num_initial < num_samples and not stop_flag.stop():

        #print(len(rval))
        anc_cand = np.min((len(rval), 2 * population_size))
        
        ancest_dist = categorical([1./anc_cand] * anc_cand)
        
        #choose ancestor uniformly at random from previous samples
        pop = []
        for _ in range(population_size):
            #assert(len(idx) ==1 and  idx.)
            tmp = proposal_method.gen_proposal(rval[-int(ancest_dist.rvs())])
            if not hasattr(tmp, "__iter__"):
                tmp = [tmp]
            pop.extend(tmp)

        proposal_method.observe(pop) # adapt proposal
        rval.extend(importance_resampling(population_size, pop))
        if not quiet:
            print(len(rval), "samples", file=sys.stderr)
        
    try:
        pass
        #print("jump model", proposal_method.jump_mdl.ddist.K, "thres model",proposal_method.jump_thres_mdl.rv(),
        #      "gr_covar_mdl", proposal_method.gr_covar_mdl.rv())
    except:
        pass
    #assert()
    return (np.array([s.sample for s in rval[num_initial:]]), np.array([s.lpost for s in rval[num_initial:]]))
Ejemplo n.º 2
0
    def observe(self, population):
        lweights = np.array([s.lweight for s in population])
        #print(lweights)
        lweights = lweights - logsumexp(lweights)  #+ 1000
        #print(lweights)
        indices = np.array([self.prop2idx[s.prop_obj] for s in population])
        for i in range(len(lweights)):
            prop_idx = indices[i]
            self.num_samp[prop_idx] = self.num_samp[prop_idx] + 1
            self.sum[prop_idx] = logsumexp((self.sum[prop_idx], lweights[i]))
            self.sqr_sum[prop_idx] = logsumexp(
                (self.sqr_sum[prop_idx], 2 * lweights[i]))
        lnum_samp = log(self.num_samp)
        self.var = exp(
            logsumexp([self.sum, self.sqr_sum - lnum_samp], 0) - lnum_samp)
        #self.var = exp(self.var - logsumexp(self.var))

        if self.var.size > 1:
            tmp = self.var.sum()
            if tmp == 0 or np.isnan(tmp):
                prop_prob = np.array([1. / self.var.size] * self.var.size)
            else:
                prop_prob = (self.var.sum() - self.var)
                prop_prob = prop_prob / prop_prob.sum(
                ) / 2 + np.random.dirichlet(1 + self.num_samp) / 2
        else:
            prop_prob = np.array([1. / self.var.size] * self.var.size)
        self.prop_dist = categorical(prop_prob)
 def observe(self, population):
     lweights = np.array([s.lweight for s in population])
     #print(lweights)
     lweights = lweights - logsumexp(lweights) #+ 1000
     #print(lweights)
     indices = np.array([self.prop2idx[s.prop_obj] for s in population])
     for i in range(len(lweights)):
         prop_idx = indices[i]
         self.num_samp[prop_idx] = self.num_samp[prop_idx] + 1
         self.sum[prop_idx] = logsumexp((self.sum[prop_idx], lweights[i]))
         self.sqr_sum[prop_idx] = logsumexp((self.sqr_sum[prop_idx], 2*lweights[i]))
     lnum_samp = log(self.num_samp)
     self.var = exp(logsumexp([self.sum, self.sqr_sum - lnum_samp], 0) - lnum_samp)
     #self.var = exp(self.var - logsumexp(self.var))
     
     if self.var.size > 1:
         tmp = self.var.sum()
         if tmp == 0 or np.isnan(tmp):
             prop_prob = np.array([1./self.var.size] * self.var.size)
         else:
             prop_prob = (self.var.sum() - self.var)
             prop_prob = prop_prob/prop_prob.sum()/2 + np.random.dirichlet(1 + self.num_samp)/2
     else:
         prop_prob = np.array([1./self.var.size] * self.var.size)
     self.prop_dist = categorical(prop_prob)
Ejemplo n.º 4
0
def sample_lpost_based(num_samples, initial_particles, proposal_method, population_size = 20):
    rval = []
    anc = proposal_method.process_initial_samples(initial_particles)
    num_initial = len(rval)
    
    while len(rval) - num_initial < num_samples:
        ancest_dist = np.array([a.lpost for a in anc])
        ancest_dist = categorical(ancest_dist - logsumexp(ancest_dist), p_in_logspace = True)
        
        #choose ancestor uniformly at random from previous samples
        pop = [proposal_method.gen_proposal(anc[ancest_dist.rvs()])
                for _ in range(population_size)]

        prop_w = np.array([s.lweight for s in pop])
        prop_w = exp(prop_w - logsumexp(prop_w))
        
        
        # Importance Resampling
        while True:
            try:
                draws = np.random.multinomial(population_size, prop_w)
                break
            except ValueError:
                prop_w /= prop_w.sum()
                
        for idx in range(len(draws)):
            rval.extend([pop[idx]] * draws[idx])
            anc.append(pop[idx])
    
    return (np.array([s.sample for s in rval]), np.array([s.lpost for s in rval]))
Ejemplo n.º 5
0
def sample_lpost_based(num_samples, initial_particles, proposal_method, population_size = 20):
    rval = []
    anc = proposal_method.process_initial_samples(initial_particles)
    num_initial = len(rval)
    
    while len(rval) - num_initial < num_samples:
        ancest_dist = np.array([a.lpost for a in anc])
        ancest_dist = categorical(ancest_dist - logsumexp(ancest_dist), p_in_logspace = True)
        
        #choose ancestor uniformly at random from previous samples
        pop = [proposal_method.gen_proposal(anc[ancest_dist.rvs()])
                for _ in range(population_size)]

        prop_w = np.array([s.lweight for s in pop])
        prop_w = exp(prop_w - logsumexp(prop_w))
        
        
        # Importance Resampling
        while True:
            try:
                draws = np.random.multinomial(population_size, prop_w)
                break
            except ValueError:
                prop_w /= prop_w.sum()
                
        for idx in range(len(draws)):
            rval.extend([pop[idx]] * draws[idx])
            anc.append(pop[idx])
    
    return (np.array([s.sample for s in rval]), np.array([s.lpost for s in rval]))
Ejemplo n.º 6
0
 def fit(self, samples):
     import sklearn.mixture
     m = sklearn.mixture.DPGMM(covariance_type="full")
     m.fit(samples)
     self.num_components = len(m.weights_)
     self.comp_lprior = log(m.weights_)
     self.dist_cat = categorical(exp(self.comp_lprior))
     self.comp_dist = [mvnorm(m.means_[i], np.linalg.inv(m.precs_[i]), Ki = m.precs_[i]) for i in range(self.comp_lprior.size)]
     self.dim = m.means_[0].size
Ejemplo n.º 7
0
 def fit(self, samples):
     import sklearn.mixture
     m = sklearn.mixture.GMM(self.num_components, "full")
     m.fit(samples)
     self.comp_lprior = log(m.weights_)
     self.dist_cat = categorical(exp(self.comp_lprior))
     self.comp_dist = [mvnorm(m.means_[i], m.covars_[i]) for i in range(self.comp_lprior.size)]
     self.dim = m.means_[0].size
     #self._e_step()
     if False:        
         old = -1
         i = 0
         while not np.all(old == self.resp):
             i += 1
             old = self.resp.copy()
             self._e_step()
             self._m_step()
             print(np.sum(old == self.resp)/self.resp.size)
         #print("Convergence after",i,"iterations")
         self.dist_cat = categorical(exp(self.comp_lprior))
Ejemplo n.º 8
0
 def fit(self, samples):
     import sklearn.mixture
     m = sklearn.mixture.DPGMM(covariance_type="full")
     m.fit(samples)
     self.num_components = len(m.weights_)
     self.comp_lprior = log(m.weights_)
     self.dist_cat = categorical(exp(self.comp_lprior))
     self.comp_dist = [
         mvnorm(m.means_[i], np.linalg.inv(m.precs_[i]), Ki=m.precs_[i])
         for i in range(self.comp_lprior.size)
     ]
     self.dim = m.means_[0].size
Ejemplo n.º 9
0
 def __init__(self, data, dir_param, mean_prior, cov_prior, df_prior):
     self.data = data
     self.num_obs = data.shape[0]
     self.dim_obs = data.shape[1]
     self.dir_param = np.array(dir_param).flatten()
     self.cat_param = np.random.dirichlet(self.dir_param)
     self.comp_indic = dist.categorical(self.cat_param).rvs(size=self.num_obs, indic=True)
     
     self.mean_prior = mean_prior
     self.cov_prior = cov_prior
     self.df_prior = df_prior
     self.update_comp_dists([[mean_prior.rvs(), cov_prior.rv(), df_prior.rvs()]
                                 for _ in range(len(dir_param))])
Ejemplo n.º 10
0
 def lprior(self):
     rval = dist.dirichlet(self.dir_param).logpdf(self.cat_param)
     assert (rval != -np.inf)
     rval = rval + dist.categorical(self.cat_param).logpdf(
         self.comp_indic, indic=True).sum()
     assert (rval != -np.inf)
     for i in range(len(self.comp_param)):
         rval = rval + self.mean_prior.logpdf(self.comp_param[i][0])
         assert (rval != -np.inf)
         rval = rval + self.cov_prior.logpdf(self.comp_param[i][1])
         assert (rval != -np.inf)
         rval = rval + self.df_prior.logpdf(self.comp_param[i][2])
         assert (rval != -np.inf)
     return rval
Ejemplo n.º 11
0
 def lprior(self):
     rval = dist.dirichlet(self.dir_param).logpdf(self.cat_param)
     assert(rval != -np.inf)
     rval = rval + dist.categorical(self.cat_param).logpdf(self.comp_indic,
                                                     indic = True).sum()
     assert(rval != -np.inf)
     for i in range(len(self.comp_param)):
         rval = rval + self.mean_prior.logpdf(self.comp_param[i][0])
         assert(rval != -np.inf)
         rval = rval + self.cov_prior.logpdf(self.comp_param[i][1])
         assert(rval != -np.inf)
         rval = rval + self.df_prior.logpdf(self.comp_param[i][2])
         assert(rval != -np.inf)
     return rval
Ejemplo n.º 12
0
 def fit(self, samples):
     import sklearn.mixture
     m = sklearn.mixture.GMM(self.num_components, "full")
     m.fit(samples)
     self.comp_lprior = log(m.weights_)
     self.dist_cat = categorical(exp(self.comp_lprior))
     self.comp_dist = [
         mvnorm(m.means_[i], m.covars_[i])
         for i in range(self.comp_lprior.size)
     ]
     self.dim = m.means_[0].size
     #self._e_step()
     if False:
         old = -1
         i = 0
         while not np.all(old == self.resp):
             i += 1
             old = self.resp.copy()
             self._e_step()
             self._m_step()
             print(np.sum(old == self.resp) / self.resp.size)
         #print("Convergence after",i,"iterations")
         self.dist_cat = categorical(exp(self.comp_lprior))
Ejemplo n.º 13
0
def importance_resampling(resampled_size, pop):
    prop_w = np.array([s.lweight for s in pop])
    prop_w = exp(prop_w - logsumexp(prop_w))
    # Importance Resampling
    while True:
        try:
            dist = categorical(prop_w)
            break
        except ValueError:
            prop_w /= prop_w.sum()
    
    new_samp = []
    for idx in range(resampled_size):
        new_samp.append(pop[dist.rvs()])
    return new_samp
Ejemplo n.º 14
0
    def __init__(self, data, dir_param, mean_prior, cov_prior, df_prior):
        self.data = data
        self.num_obs = data.shape[0]
        self.dim_obs = data.shape[1]
        self.dir_param = np.array(dir_param).flatten()
        self.cat_param = np.random.dirichlet(self.dir_param)
        self.comp_indic = dist.categorical(self.cat_param).rvs(
            size=self.num_obs, indic=True)

        self.mean_prior = mean_prior
        self.cov_prior = cov_prior
        self.df_prior = df_prior
        self.update_comp_dists(
            [[mean_prior.rvs(),
              cov_prior.rv(), df_prior.rvs()] for _ in range(len(dir_param))])
Ejemplo n.º 15
0
def sample_sis(num_samples,
               initial_particles,
               proposal_method,
               stop_flag=flags.NeverStopFlag(),
               quiet=True):
    part = proposal_method.process_initial_samples(initial_particles)
    rval = []
    num_initial = len(rval)

    while len(rval) - num_initial < num_samples and not stop_flag.stop():
        #print(len(rval))

        #choose ancestor uniformly at random from previous samples
        pop = []
        part_new = []
        for p in part:
            tmp = proposal_method.gen_proposal(p)
            if hasattr(tmp, "__iter__"):
                pop.extend(tmp)
                lposts = np.array([t.lpost for t in tmp])
                cd = categorical(lposts - logsumexp(lposts), True)
                part_new.append(tmp[cd.rvs()])
            else:
                pop.append(tmp)
                part_new.append(tmp)

        prop_w = np.array([s.lweight for s in pop])
        prop_w = exp(prop_w - logsumexp(prop_w))

        # Importance Resampling
        while True:
            try:
                draws = np.random.multinomial(len(initial_particles), prop_w)
                break
            except ValueError:
                prop_w /= prop_w.sum()

        new_samp = []
        for idx in range(len(draws)):
            new_samp.extend([pop[idx]] * draws[idx])
        proposal_method.process_new_ancestors(new_samp)
        rval.extend(new_samp)
        if not quiet:
            print(len(rval), "samples", file=sys.stderr)
        part = part_new

    return (np.array([s.sample
                      for s in rval]), np.array([s.lpost for s in rval]))
Ejemplo n.º 16
0
def test_DirCatTMM():
    num_obs = 1000
    for dim in range(2,4):
        mu = np.array([11 * (i+1) for i in range(dim)])
        K = np.eye(dim) * 5
        df = dim + 1
        obs_dist = dist.mvt(mu, K, df)
        obs = obs_dist.rvs(num_obs)
        dctmm = dis.DirCatTMM(obs, [1]*dim, obs_dist,
                                      dist.invwishart(np.eye(dim) * 5, dim + 1),
                                      stats.gamma(1, scale=1, loc=dim+1))
        orig_cat_param = dctmm.cat_param
        dctmm.cat_param = np.zeros(dim)
        for i in range(dim):
            dctmm.cat_param[i] = 1
            ### Test DirCatTMM.lpost_comp_indic ###
            for j in range(dim):
                c_indic = np.zeros(dim)
                c_indic[j] = 1
                for o in range(obs.shape[0]):
                    if i == j:
                        assert(dctmm.lpost_comp_indic(c_indic, o) > -np.inf)
                    else:
                        assert(dctmm.lpost_comp_indic(c_indic, o) == -np.inf)
                c_indic[j] = 0
            ### Test DirCatTMM.llhood_comp_param ###
            highest = dctmm.llhood_comp_param((mu, K, df), i)
            assert(highest >= dctmm.llhood_comp_param((-mu, K, df), i))
            assert(highest >= dctmm.llhood_comp_param((mu, K*5, df), i))
            assert(highest >= dctmm.llhood_comp_param((mu, K/2, df), i))
            assert(highest >= dctmm.llhood_comp_param((mu, K, df+10), i))
            dctmm.cat_param[i] = 0
        
        
        ### Test DirCatTMM.lprior ###
        dctmm.cat_param = np.array(dctmm.dir_param / dctmm.dir_param.sum())
        dctmm.comp_indic = dist.categorical(dctmm.cat_param).rvs(num_obs, indic = True)
        dctmm.update_comp_dists([(mu, K, df)] * dim)
        highest = dctmm.lprior()
        
        c_param = dctmm.dir_param + np.arange(dim)
        dctmm.cat_param = np.array(c_param / c_param.sum())
        ch_cat_param = dctmm.lprior()
        assert(highest > ch_cat_param)
        dctmm.update_comp_dists([(-mu, K, df)] * dim)
        assert(ch_cat_param > dctmm.lprior())
Ejemplo n.º 17
0
def sample_sis(num_samples, initial_particles, proposal_method, stop_flag = flags.NeverStopFlag(), quiet = True):
    part = proposal_method.process_initial_samples(initial_particles)
    rval = []
    num_initial = len(rval)
    
    while len(rval) - num_initial < num_samples and not stop_flag.stop():
        #print(len(rval))
        
        #choose ancestor uniformly at random from previous samples
        pop = []
        part_new = []
        for p in part:
            tmp = proposal_method.gen_proposal(p)
            if hasattr(tmp, "__iter__"):
                pop.extend(tmp)
                lposts = np.array([t.lpost for t in tmp])
                cd = categorical(lposts - logsumexp(lposts), True)
                part_new.append(tmp[cd.rvs()])
            else:
                pop.append(tmp)
                part_new.append(tmp)
                

        prop_w = np.array([s.lweight for s in pop])
        prop_w = exp(prop_w - logsumexp(prop_w))
        
        
        # Importance Resampling
        while True:
            try:
                draws = np.random.multinomial(len(initial_particles), prop_w)
                break
            except ValueError:
                prop_w /= prop_w.sum()
        
        new_samp = []
        for idx in range(len(draws)):
            new_samp.extend([pop[idx]] * draws[idx])
        proposal_method.process_new_ancestors(new_samp)
        rval.extend(new_samp)
        if not quiet:
            print(len(rval), "samples", file=sys.stderr)
        part = part_new

    return (np.array([s.sample for s in rval]), np.array([s.lpost for s in rval]))
    def __init__(self, *predefined_proposals):
        """
        Choose proposals to decrease variance of weights
        
        Parameters
        ==========
        predefined_proposals: a set of given proposals among which to choose
        """
        
        length = len(predefined_proposals)
        self.prop2idx = {}
        self.idx2prop = []
        for i in range(len(predefined_proposals)):
            self.prop2idx[predefined_proposals[i]] = i
            self.idx2prop.append(predefined_proposals[i])
        self.num_samp = np.zeros(length) # number of samples for weights
        self.sum = -np.inf*np.ones(length) # sum of weights
        self.sqr_sum = -np.inf*np.ones(length) # sum of squares
        self.var = np.zeros(length) # weight variance estimate

        self.prop_dist = categorical(np.array([1./length] * length))
Ejemplo n.º 19
0
    def __init__(self, *predefined_proposals):
        """
        Choose proposals to decrease variance of weights
        
        Parameters
        ==========
        predefined_proposals: a set of given proposals among which to choose
        """

        length = len(predefined_proposals)
        self.prop2idx = {}
        self.idx2prop = []
        for i in range(len(predefined_proposals)):
            self.prop2idx[predefined_proposals[i]] = i
            self.idx2prop.append(predefined_proposals[i])
        self.num_samp = np.zeros(length)  # number of samples for weights
        self.sum = -np.inf * np.ones(length)  # sum of weights
        self.sqr_sum = -np.inf * np.ones(length)  # sum of squares
        self.var = np.zeros(length)  # weight variance estimate

        self.prop_dist = categorical(np.array([1. / length] * length))
Ejemplo n.º 20
0
def importance_resampling(resampled_size, pop, ess = False):
    prop_w = np.array([s.lweight for s in pop])
    prop_w = prop_w - logsumexp(prop_w)
    if ess:
        rval_ess = compute_ess(prop_w)
    prop_w = exp(prop_w)
    # Importance Resampling
    while True:
        try:
            dist = categorical(prop_w)
            break
        except ValueError:
            prop_w /= prop_w.sum()
    
    new_samp = []
    for idx in range(resampled_size):
        new_samp.append(pop[dist.rvs()])
        
    if ess:
        return (new_samp, rval_ess)
    else:
        return new_samp
Ejemplo n.º 21
0
def importance_resampling(resampled_size, pop, ess=False):
    prop_w = np.array([s.lweight for s in pop])
    prop_w = prop_w - logsumexp(prop_w)
    if ess:
        rval_ess = compute_ess(prop_w)
    prop_w = exp(prop_w)
    # Importance Resampling
    while True:
        try:
            dist = categorical(prop_w)
            break
        except ValueError:
            prop_w /= prop_w.sum()

    new_samp = []
    for idx in range(resampled_size):
        new_samp.append(pop[dist.rvs()])

    if ess:
        return (new_samp, rval_ess)
    else:
        return new_samp
Ejemplo n.º 22
0
 def lpost_comp_indic(self, x, observation_idx):
     assert(observation_idx is not None)
     comp_idx = np.argmax(x.flat)
     return (dist.categorical(self.cat_param).logpdf(comp_idx) +
             self.comp_dist[comp_idx].logpdf(self.data[observation_idx]))
Ejemplo n.º 23
0
 def lpost_comp_indic(self, x, observation_idx):
     assert (observation_idx is not None)
     comp_idx = np.argmax(x.flat)
     return (dist.categorical(self.cat_param).logpdf(comp_idx) +
             self.comp_dist[comp_idx].logpdf(self.data[observation_idx]))