def __init__(self, grammar, make_h0, data, max_depth=3, increment_from=None, yield_partition=False, steps=Infinity, grammar_optimize=True, **kwargs): """ Initializer. *grammar* - what grammar are we using? *make_h0* - a function to generate h0s. This MUST take a value argument to set the value *data* - D for P(H|D) *max_depth*, *max_n* -- only one of these may be specified. Either enumerate up to depth max_depth, or enumerate up to the largest depth such that the number of trees is less than max_N TODO: We can in principle optimize the grammar by including only one rule of the form NT->TERMiNAL for each NT. This will exponentially speed things up... """ partitions = [] # first figure out how many trees we have # We generate a lot and then replace terminal nodes with their types, because the terminal nodes will # be the only ones that are allowed to be altered *within* a chain. So this collapses together # trees that differ only on terminals seen_collapsed = set() # what have we seen the collapsed forms of? for t in grammar.increment_tree(x=increment_from, max_depth=max_depth): ct = trim_leaves(t) if ct not in seen_collapsed: seen_collapsed.add(ct) partitions.append(t) #print "# Using partitions:", partitions # Take each partition (h0) and set it to have zero resample_p exact at the leaf for p in partitions: print p for t in p: if not t.is_terminal(): t.resample_p = 0.0 else: t.resample_p = 1.0 # initialize each chain MultipleChainMCMC.__init__(self, lambda: None, data, steps=steps, nchains=len(partitions), **kwargs) # And set each to the partition for c, p in zip(self.chains, partitions): c.set_state(make_h0(value=p)) # and store these self.partitions = map(copy, partitions)
def __init__(self, grammar, make_h0, data, max_N=1000, steps=Infinity, **kwargs): """ :param grammar: -- the grammar we use :param make_h0: -- make a hypothesis :param data: -- data for the posterior :param max_N: -- the max number of samples we'll take :param steps: -- how many steps :return: """ # first figure out the depth we can go to without exceeding max_N partitions = [] try: for d in infrange(): #print "# trying ", d tmp = [] for i, t in enumerate(grammar.enumerate_at_depth(d, leaves=False)): tmp.append(t) if i > max_N: raise BreakException # this gets set if we successfully exit the loop # so it will store the last set that didn't exceed size max_N partitions = tmp except BreakException: pass # Take each partition, which doesn't have leaves, and generate leaves, setting # it to a random generation (fill in the leaves with random hypotheses) for p in partitions: print "# Partition:", p for n in p.subnodes(): # set to not resample these n.resample_p = 0.0 # and fill in the missing leaves with a random generation for i, a in enumerate(n.args): if grammar.is_nonterminal(a): n.args[i] = grammar.generate(a) # initialize each chain MultipleChainMCMC.__init__(self, lambda: None, data, steps=steps, nchains=len(partitions), **kwargs) # And set each to the partition for c, p in zip(self.chains, partitions): c.set_state(make_h0(value=p)) # and store these self.partitions = map(copy, partitions)
def __init__(self, grammar, make_h0, data, max_N=1000, steps=Infinity, **kwargs): """ :param grammar: -- the grammar we use :param make_h0: -- make a hypothesis :param data: -- data for the posterior :param max_N: -- the max number of samples we'll take :param steps: -- how many steps :return: """ # first figure out the depth we can go to without exceeding max_N partitions = [] try: for d in infrange(): #print "# trying ", d tmp = [] for i, t in enumerate(grammar.enumerate_at_depth(d, leaves=False)): tmp.append(t) if i > max_N: raise BreakException # this gets set if we successfully exit the loop # so it will store the last set that didn't exceed size max_N partitions = tmp except BreakException: pass assert len(partitions) > 0 # and store these so we can see them later self.partitions = map(copy, partitions) # Take each partition, which doesn't have leaves, and generate leaves, setting # it to a random generation (fill in the leaves with random hypotheses) for p in partitions: print "# Initializing partition:", p for n in p.subnodes(): # set to not resample these setattr(n, 'resample_p', 0.0) ## NOTE: This is an old version of how proposals were made, but we use it here to store in each node a prob of being resampled # and fill in the missing leaves with a random generation for i, a in enumerate(n.args): if grammar.is_nonterminal(a): n.args[i] = grammar.generate(a) print "# Initialized %s partitions" % len(partitions) # initialize each chain MultipleChainMCMC.__init__(self, lambdaNone, data, steps=steps, nchains=len(partitions), **kwargs) # And set each to the partition for c, p in zip(self.chains, partitions): # We need to make sure the proposal_function is set to use resample_p, which is set above v = make_h0(value=p, proposal_function=MyProposal(grammar) ) c.set_state(v, compute_posterior=False) # and make v use the right proposal function
def __init__(self, make_h0, data, within_steps=100, **kwargs): MultipleChainMCMC.__init__(self, make_h0, data, **kwargs) self.within_steps = within_steps self.kwargs = kwargs self.seen = set() self.chainZ = [-Infinity for _ in xrange(self.nchains)] self.nsamples = 0 # How many samples have we drawn?
def __init__(self, make_h0, data, temperatures=[1.0, 1.1, 1.5], within_steps=50, swaps=1, yield_only_t0=False, **kwargs): self.yield_only_t0 = yield_only_t0 #whether we yield all samples, or only from the lowest temperature self.within_steps = within_steps self.swaps = swaps assert 'nchains' not in kwargs MultipleChainMCMC.__init__(self, make_h0, data, nchains=len(temperatures), **kwargs) # and set the temperatures for i, t in enumerate(temperatures): self.chains[i].likelihood_temperature = t
def __init__(self, make_h0, data, temperatures=[1.0, 1.1, 1.5], within_steps=10, swaps=1, yield_only_t0=False, **kwargs): self.yield_only_t0 = yield_only_t0 #whether we yield all samples, or only from the lowest temperature self.within_steps = within_steps self.swaps=swaps assert 'nchains' not in kwargs MultipleChainMCMC.__init__(self, make_h0, data, nchains=len(temperatures), **kwargs) # and set the temperatures for i,t in enumerate(temperatures): self.chains[i].likelihood_temperature = t
def __init__(self, grammar, make_h0, data, max_depth=3, increment_from=None, yield_partition=False, steps=Infinity, grammar_optimize=True, **kwargs): """ Initializer. *grammar* - what grammar are we using? *make_h0* - a function to generate h0s. This MUST take a value argument to set the value *data* - D for P(H|D) *max_depth*, *max_n* -- only one of these may be specified. Either enumerate up to depth max_depth, or enumerate up to the largest depth such that the number of trees is less than max_N TODO: We can in principle optimize the grammar by including only one rule of the form NT->TERMiNAL for each NT. This will exponentially speed things up... """ partitions = [] # first figure out how many trees we have # We generate a lot and then replace terminal nodes with their types, because the terminal nodes will # be the only ones that are allowed to be altered *within* a chain. So this collapses together # trees that differ only on terminals seen_collapsed = set() # what have we seen the collapsed forms of? for t in grammar.increment_tree(x=increment_from, max_depth=max_depth): ct = trim_leaves(t) if ct not in seen_collapsed: seen_collapsed.add(ct) partitions.append(t) #print "# Using partitions:", partitions # Take each partition (h0) and set it to have zero resample_p exact at the leaf for p in partitions: print p for t in p: if not t.is_terminal(): t.resample_p = 0.0 else: t.resample_p = 1.0 # initialize each chain MultipleChainMCMC.__init__(self, lambda: None, data, steps=steps, nchains=len(partitions), **kwargs) # And set each to the partition for c,p in zip(self.chains, partitions): c.set_state(make_h0(value=p)) # and store these self.partitions = map(copy, partitions)
def next(self): nxt = MultipleChainMCMC.next(self) # get the next one idx = self.chain_idx if nxt not in self.seen: self.chainZ[idx] = logplusexp(self.chainZ[idx], nxt.posterior_score) self.seen.add(nxt) # # Process the situation where we need to re-organize if self.nsamples % (self.within_steps * self.nchains) == 0 and self.nsamples > 0: self.refresh() self.nsamples += 1 return nxt
def __init__(self, grammar, make_h0, data, max_N=1000, steps=Infinity, **kwargs): """ :param grammar: -- the grammar we use :param make_h0: -- make a hypothesis :param data: -- data for the posterior :param max_N: -- the max number of samples we'll take :param steps: -- how many steps :return: """ # first figure out the depth we can go to without exceeding max_N partitions = [] try: for d in infrange(): #print "# trying ", d tmp = [] for i, t in enumerate( grammar.enumerate_at_depth(d, leaves=False)): tmp.append(t) if i > max_N: raise BreakException # this gets set if we successfully exit the loop # so it will store the last set that didn't exceed size max_N partitions = tmp except BreakException: pass assert len(partitions) > 0 # and store these so we can see them later self.partitions = map(copy, partitions) # Take each partition, which doesn't have leaves, and generate leaves, setting # it to a random generation (fill in the leaves with random hypotheses) for p in partitions: print "# Initializing partition:", p for n in p.subnodes(): # set to not resample these setattr( n, 'resample_p', 0.0 ) ## NOTE: This is an old version of how proposals were made, but we use it here to store in each node a prob of being resampled # and fill in the missing leaves with a random generation for i, a in enumerate(n.args): if grammar.is_nonterminal(a): n.args[i] = grammar.generate(a) print "# Initialized %s partitions" % len(partitions) # initialize each chain MultipleChainMCMC.__init__(self, lambdaNone, data, steps=steps, nchains=len(partitions), **kwargs) # And set each to the partition for c, p in zip(self.chains, partitions): # We need to make sure the proposal_function is set to use resample_p, which is set above v = make_h0(value=p, proposal_function=MyProposal(grammar)) c.set_state(v, compute_posterior=False ) # and make v use the right proposal function