Ejemplo n.º 1
0
    def __init__(self,
                 grammar,
                 make_h0,
                 data,
                 max_depth=3,
                 increment_from=None,
                 yield_partition=False,
                 steps=Infinity,
                 grammar_optimize=True,
                 **kwargs):
        """
            Initializer.
            
            *grammar* - what grammar are we using?
            *make_h0* - a function to generate h0s. This MUST take a value argument to set the value
            *data*    - D for P(H|D)
            *max_depth*, *max_n* -- only one of these may be specified. Either enumerate up to depth max_depth, or enumerate up to the largest depth such that the number of trees is less than max_N
            
            TODO: We can in principle optimize the grammar by including only one rule of the form NT->TERMiNAL for each NT. This will exponentially speed things up...
        """

        partitions = []

        # first figure out how many trees we have
        # We generate a lot and then replace terminal nodes with their types, because the terminal nodes will
        # be the only ones that are allowed to be altered *within* a chain. So this collapses together
        # trees that differ only on terminals
        seen_collapsed = set()  # what have we seen the collapsed forms of?
        for t in grammar.increment_tree(x=increment_from, max_depth=max_depth):
            ct = trim_leaves(t)
            if ct not in seen_collapsed:
                seen_collapsed.add(ct)
                partitions.append(t)

    #print "# Using partitions:", partitions

    # Take each partition (h0) and set it to have zero resample_p exact at the leaf
        for p in partitions:
            print p
            for t in p:
                if not t.is_terminal():
                    t.resample_p = 0.0
                else:
                    t.resample_p = 1.0

        # initialize each chain
        MultipleChainMCMC.__init__(self,
                                   lambda: None,
                                   data,
                                   steps=steps,
                                   nchains=len(partitions),
                                   **kwargs)

        # And set each to the partition
        for c, p in zip(self.chains, partitions):
            c.set_state(make_h0(value=p))

        # and store these
        self.partitions = map(copy, partitions)
Ejemplo n.º 2
0
    def __init__(self, grammar, make_h0, data, max_N=1000, steps=Infinity, **kwargs):
        """
        :param grammar: -- the grammar we use
        :param make_h0: -- make a hypothesis
        :param data:    -- data for the posterior
        :param max_N: -- the max number of samples we'll take
        :param steps: -- how many steps
        :return:
        """

        # first figure out the depth we can go to without exceeding max_N
        partitions = []
        try:
            for d in infrange():
                #print "# trying ", d
                tmp = []
                for i, t in enumerate(grammar.enumerate_at_depth(d, leaves=False)):
                    tmp.append(t)
                    if i > max_N:
                        raise BreakException

                # this gets set if we successfully exit the loop
                # so it will store the last set that didn't exceed size max_N
                partitions = tmp
        except BreakException:
            pass

        # Take each partition, which doesn't have leaves, and generate leaves, setting
        # it to a random generation (fill in the leaves with random hypotheses)
        for p in partitions:

            print "# Partition:", p

            for n in p.subnodes():
                # set to not resample these
                n.resample_p = 0.0

                # and fill in the missing leaves with a random generation
                for i, a in enumerate(n.args):
                    if grammar.is_nonterminal(a):
                        n.args[i] = grammar.generate(a)









        # initialize each chain
        MultipleChainMCMC.__init__(self, lambda: None, data, steps=steps, nchains=len(partitions), **kwargs)
        
        # And set each to the partition
        for c, p in zip(self.chains, partitions):
            c.set_state(make_h0(value=p))
        
        # and store these
        self.partitions = map(copy, partitions)
Ejemplo n.º 3
0
    def __init__(self, grammar, make_h0, data, max_N=1000, steps=Infinity, **kwargs):
        """
        :param grammar: -- the grammar we use
        :param make_h0: -- make a hypothesis
        :param data:    -- data for the posterior
        :param max_N: -- the max number of samples we'll take
        :param steps: -- how many steps
        :return:
        """

        # first figure out the depth we can go to without exceeding max_N
        partitions = []
        try:
            for d in infrange():
                #print "# trying ", d
                tmp = []
                for i, t in enumerate(grammar.enumerate_at_depth(d, leaves=False)):
                    tmp.append(t)
                    if i > max_N:
                        raise BreakException

                # this gets set if we successfully exit the loop
                # so it will store the last set that didn't exceed size max_N
                partitions = tmp
        except BreakException:
            pass

        assert len(partitions) > 0

        # and store these so we can see them later
        self.partitions = map(copy, partitions)

        # Take each partition, which doesn't have leaves, and generate leaves, setting
        # it to a random generation (fill in the leaves with random hypotheses)
        for p in partitions:

            print "# Initializing partition:", p

            for n in p.subnodes():
                # set to not resample these
                setattr(n, 'resample_p', 0.0) ## NOTE: This is an old version of how proposals were made, but we use it here to store in each node a prob of being resampled

                # and fill in the missing leaves with a random generation
                for i, a in enumerate(n.args):
                    if grammar.is_nonterminal(a):
                        n.args[i] = grammar.generate(a)
        print "# Initialized %s partitions" % len(partitions)

        # initialize each chain
        MultipleChainMCMC.__init__(self, lambdaNone, data, steps=steps, nchains=len(partitions), **kwargs)


        # And set each to the partition
        for c, p in zip(self.chains, partitions):
            # We need to make sure the proposal_function is set to use resample_p, which is set above
            v = make_h0(value=p, proposal_function=MyProposal(grammar) )
            c.set_state(v, compute_posterior=False) # and make v use the right proposal function
Ejemplo n.º 4
0
    def __init__(self, make_h0, data, within_steps=100, **kwargs):

        MultipleChainMCMC.__init__(self, make_h0, data, **kwargs)

        self.within_steps = within_steps

        self.kwargs = kwargs
        self.seen = set()
        self.chainZ = [-Infinity for _ in xrange(self.nchains)]
        self.nsamples = 0  # How many samples have we drawn?
Ejemplo n.º 5
0
    def __init__(self, make_h0, data, within_steps=100, **kwargs):

        MultipleChainMCMC.__init__(self, make_h0, data, **kwargs)

        self.within_steps = within_steps

        self.kwargs = kwargs
        self.seen = set()
        self.chainZ = [-Infinity for _ in xrange(self.nchains)]
        self.nsamples = 0  # How many samples have we drawn?
Ejemplo n.º 6
0
    def __init__(self, make_h0, data, temperatures=[1.0, 1.1, 1.5], within_steps=50, swaps=1, yield_only_t0=False, **kwargs):

        self.yield_only_t0 = yield_only_t0 #whether we yield all samples, or only from the lowest temperature
        self.within_steps = within_steps
        self.swaps = swaps

        assert 'nchains' not in kwargs

        MultipleChainMCMC.__init__(self, make_h0, data, nchains=len(temperatures), **kwargs)

        # and set the temperatures
        for i, t in enumerate(temperatures):
            self.chains[i].likelihood_temperature = t
Ejemplo n.º 7
0
    def __init__(self, make_h0, data, temperatures=[1.0, 1.1, 1.5], within_steps=10, swaps=1, yield_only_t0=False, **kwargs):

        self.yield_only_t0 = yield_only_t0 #whether we yield all samples, or only from the lowest temperature
        self.within_steps = within_steps
        self.swaps=swaps

        assert 'nchains' not in kwargs

        MultipleChainMCMC.__init__(self, make_h0, data, nchains=len(temperatures), **kwargs)

        # and set the temperatures
        for i,t in enumerate(temperatures):
            self.chains[i].likelihood_temperature = t
Ejemplo n.º 8
0
  def __init__(self, grammar, make_h0, data, max_depth=3, increment_from=None, yield_partition=False, steps=Infinity, grammar_optimize=True, **kwargs):
      """
          Initializer.
          
          *grammar* - what grammar are we using?
          *make_h0* - a function to generate h0s. This MUST take a value argument to set the value
          *data*    - D for P(H|D)
          *max_depth*, *max_n* -- only one of these may be specified. Either enumerate up to depth max_depth, or enumerate up to the largest depth such that the number of trees is less than max_N
          
          TODO: We can in principle optimize the grammar by including only one rule of the form NT->TERMiNAL for each NT. This will exponentially speed things up...
      """
      
      partitions = []
 
      # first figure out how many trees we have
      # We generate a lot and then replace terminal nodes with their types, because the terminal nodes will
      # be the only ones that are allowed to be altered *within* a chain. So this collapses together
      # trees that differ only on terminals
      seen_collapsed = set() # what have we seen the collapsed forms of?
      for t in grammar.increment_tree(x=increment_from, max_depth=max_depth):
          ct = trim_leaves(t)
          if ct not in seen_collapsed:
              seen_collapsed.add(ct)
              partitions.append(t)
      
  #print "# Using partitions:", partitions
      
      # Take each partition (h0) and set it to have zero resample_p exact at the leaf
      for p in partitions:
          print p
          for t in p:
              if not t.is_terminal():
                  t.resample_p = 0.0
              else:
                  t.resample_p = 1.0
      
      # initialize each chain
      MultipleChainMCMC.__init__(self, lambda: None, data, steps=steps, nchains=len(partitions), **kwargs)
      
      # And set each to the partition
      for c,p in zip(self.chains, partitions):
          c.set_state(make_h0(value=p))
      
      # and store these
      self.partitions = map(copy, partitions)
Ejemplo n.º 9
0
    def next(self):

        nxt = MultipleChainMCMC.next(self)  # get the next one
        idx = self.chain_idx
        if nxt not in self.seen:
            self.chainZ[idx] = logplusexp(self.chainZ[idx], nxt.posterior_score)
            self.seen.add(nxt)

            # # Process the situation where we need to re-organize
        if self.nsamples % (self.within_steps * self.nchains) == 0 and self.nsamples > 0:
            self.refresh()

        self.nsamples += 1

        return nxt
Ejemplo n.º 10
0
    def next(self):

        nxt = MultipleChainMCMC.next(self)  # get the next one
        idx = self.chain_idx
        if nxt not in self.seen:
            self.chainZ[idx] = logplusexp(self.chainZ[idx],
                                          nxt.posterior_score)
            self.seen.add(nxt)

            # # Process the situation where we need to re-organize
        if self.nsamples % (self.within_steps *
                            self.nchains) == 0 and self.nsamples > 0:
            self.refresh()

        self.nsamples += 1

        return nxt
Ejemplo n.º 11
0
    def __init__(self,
                 grammar,
                 make_h0,
                 data,
                 max_N=1000,
                 steps=Infinity,
                 **kwargs):
        """
        :param grammar: -- the grammar we use
        :param make_h0: -- make a hypothesis
        :param data:    -- data for the posterior
        :param max_N: -- the max number of samples we'll take
        :param steps: -- how many steps
        :return:
        """

        # first figure out the depth we can go to without exceeding max_N
        partitions = []
        try:
            for d in infrange():
                #print "# trying ", d
                tmp = []
                for i, t in enumerate(
                        grammar.enumerate_at_depth(d, leaves=False)):
                    tmp.append(t)
                    if i > max_N:
                        raise BreakException

                # this gets set if we successfully exit the loop
                # so it will store the last set that didn't exceed size max_N
                partitions = tmp
        except BreakException:
            pass

        assert len(partitions) > 0

        # and store these so we can see them later
        self.partitions = map(copy, partitions)

        # Take each partition, which doesn't have leaves, and generate leaves, setting
        # it to a random generation (fill in the leaves with random hypotheses)
        for p in partitions:

            print "# Initializing partition:", p

            for n in p.subnodes():
                # set to not resample these
                setattr(
                    n, 'resample_p', 0.0
                )  ## NOTE: This is an old version of how proposals were made, but we use it here to store in each node a prob of being resampled

                # and fill in the missing leaves with a random generation
                for i, a in enumerate(n.args):
                    if grammar.is_nonterminal(a):
                        n.args[i] = grammar.generate(a)
        print "# Initialized %s partitions" % len(partitions)

        # initialize each chain
        MultipleChainMCMC.__init__(self,
                                   lambdaNone,
                                   data,
                                   steps=steps,
                                   nchains=len(partitions),
                                   **kwargs)

        # And set each to the partition
        for c, p in zip(self.chains, partitions):
            # We need to make sure the proposal_function is set to use resample_p, which is set above
            v = make_h0(value=p, proposal_function=MyProposal(grammar))
            c.set_state(v, compute_posterior=False
                        )  # and make v use the right proposal function