Beispiel #1
0
    def config(self):
        global pipeline
        pipeline = self.pipeline
        self.samples = self.read_ini("samples", int, default=20000)
        random_start = self.read_ini("random_start", bool, default=False)
        self.Rconverge = self.read_ini("Rconverge", float, -1.0)
        if self.Rconverge==-1.0:
            self.Rconverge=None
        self.interrupted = False
        self.num_samples = 0
        #Any other options go here

        #start values from prior
        start = self.define_parameters(random_start)
        print "MCMC starting point:"
        for param, x in zip(self.pipeline.varied_params, start):
            print "    ", param, x
        self.n = self.read_ini("nsteps", int, default=100)

        #Covariance matrix
        covmat = self.load_covariance_matrix()

        #Sampler object itself.
        quiet = self.pipeline.quiet
        self.sampler = metropolis.MCMC(start, posterior, covmat, quiet=quiet)
        self.analytics = Analytics(self.pipeline.varied_params, self.pool)
Beispiel #2
0
class PyMCSampler(ParallelSampler):
    sampler_outputs = [("like", float)]

    def config(self):
        # lazy pymc import to avoid dependency when using other samplers
        import pymc
        self.pymc = pymc

        self.verbose = logging.getLogger().level > logging.WARNING
        self.interrupted = False

        # load sampling parameters
        self.num_samples = 0

        self.nsteps = self.read_ini("nsteps", int, 100)
        self.samples = self.read_ini("samples", int, 1000)
        fburn = self.read_ini("burn_fraction", float, 0.0)
        if 0.0 <= fburn < 1.0:
            self.nburn = int(fburn * self.samples)
        else:
            self.nburn = int(fburn)

        self.Rconverge = self.read_ini("Rconverge", float, -1.0)
        if self.Rconverge==-1.0:
            self.Rconverge=None

        params = self.define_parameters()

        @pymc.data
        @pymc.stochastic(verbose=self.verbose)
        def data_likelihood(params=params, value=0.0):
            try:
                params = self.pipeline.denormalize_vector(params)
            except ValueError:
                return -np.inf
            like, extra = self.pipeline.likelihood(params)
            return like

        self.mcmc = self.pymc.MCMC(input={'data_likelihood': data_likelihood,
                                          'params': params},
                                   db='ram', verbose=0)

        try:
            covmat = self.load_covariance_matrix()
        except IOError:
            covmat = None

        # determine step method
        self.do_adaptive = self.read_ini("adaptive_mcmc", bool, False)
        if self.do_adaptive:
            delay = 100
        else:
            delay = 10000000000

        if covmat is not None or self.do_adaptive:
            self.mcmc.use_step_method(self.pymc.AdaptiveMetropolis,
                                      params,
                                      cov=covmat,
                                      interval=self.nsteps,
                                      delay=delay,
                                      verbose=0)
        else:
            for p in params:
                self.mcmc.use_step_method(self.pymc.Metropolis, p, verbose=0)

        self.analytics = Analytics(self.pipeline.varied_params, self.pool)

    def sample(self):
        if self.num_samples < self.nburn:
            steps = min(self.nsteps, self.nburn - self.num_samples)
        else:
            steps = min(self.nsteps, self.samples - self.num_samples)

        # take steps MCMC steps
        self.mcmc.sample(steps, progress_bar=False, tune_throughout=False)
        if self.mcmc._current_iter != steps:
            # user must have pressed ctrl-C,
            # or something else went wrong
            self.interrupted = True

        self.num_samples += self.mcmc._current_iter

        traces = np.array([[param.denormalize(x)
                           for x in self.mcmc.trace(str(param))]
                           for param in self.pipeline.varied_params]).T
        likes = -0.5 * self.mcmc.trace('deviance')[:]

        for trace, like in itertools.izip(traces, likes):
            self.output.parameters(trace, like)

        self.analytics.add_traces(traces)

        self.output.log_noisy("Done %d iterations" % self.num_samples)

    def worker(self):
        while not self.is_converged():
            self.sample()

    def execute(self):
        self.sample()

    def is_converged(self):
        # user has pressed Ctrl-C
        if self.interrupted:
            return True
        if self.num_samples >= self.samples:
            return True
        elif self.num_samples > 0 and self.pool is not None and \
            self.Rconverge is not None:
            R = self.analytics.gelman_rubin(quiet=self.pipeline.quiet)
            R1 = abs(R - 1)
            return np.all(R1 <= self.Rconverge)
        else:
            return False

    def load_covariance_matrix(self):
        covmat_filename = self.read_ini("covmat",str,"").strip()
        if covmat_filename == "":
            return None
        if not os.path.exists(covmat_filename):
            raise ValueError(
                "Covariance matrix %s not found" % covmat_filename)
        covmat = np.loadtxt(covmat_filename)

        if covmat.ndim == 0:
            covmat = covmat.reshape((1, 1))
        elif covmat.ndim == 1:
            covmat = np.diag(covmat ** 2)

        nparams = len(self.pipeline.varied_params)
        if covmat.shape != (nparams, nparams):
            raise ValueError("The covariance matrix was shape (%d x %d), "
                             "but there are %d varied parameters." %
                             (covmat.shape[0], covmat.shape[1], nparams))

        # normalize covariance matrix
        r = np.array([param.width() for param
                      in self.pipeline.varied_params])
        for i in xrange(covmat.shape[0]):
            covmat[i, :] /= r
            covmat[:, i] /= r

        # reorder to PyMC variable ordering
        desired_order = [m.__name__ for m in self.mcmc.stochastics]
        actual_order = [str(m) for m in self.pipeline.varied_params]
        covmat = self.reorder_matrix(actual_order, desired_order, covmat)
        return covmat

    @staticmethod
    def reorder_matrix(old_order, new_order, cov):
            n = len(old_order)
            cov2 = np.zeros((n, n))
            for i in xrange(n):
                    old_i = old_order.index(new_order[i])
                    for j in xrange(n):
                            old_j = old_order.index(new_order[j])
                            cov2[i, j] = cov[old_i, old_j]
            return cov2

    # create PyMC parameter objects
    def define_parameters(self):
        ''' Create PyMC parameter objects based on varied params '''
        priors = []
        for param in self.pipeline.varied_params:
            prior = param.prior
            start_value = param.normalize(param.random_point())

            if prior is None or isinstance(prior, UniformPrior):
                # uniform prior
                priors.append(self.pymc.Uniform(str(param),
                                                lower=0.0,
                                                upper=1.0,
                                                value=start_value))
            elif isinstance(prior, GaussianPrior):
                width = param.width()
                mu = (prior.mu - param.limits[0]) / width
                tau = width ** 2 / prior.sigma2

                priors.append(self.pymc.Normal(str(param),
                                               mu=mu,
                                               tau=tau,
                                               value=start_value))
            elif isinstance(prior, ExponentialPrior):
                width = param.width()
                priors.append(self.pymc.Exponential(str(param),
                                                    beta=width / prior.beta,
                                                    value=start_value))
            else:
                raise RuntimeError("Unknown prior type in PyMC sampler")
        return priors
Beispiel #3
0
    def config(self):
        # lazy pymc import to avoid dependency when using other samplers
        import pymc
        self.pymc = pymc

        self.verbose = logging.getLogger().level > logging.WARNING
        self.interrupted = False

        # load sampling parameters
        self.num_samples = 0

        self.nsteps = self.read_ini("nsteps", int, 100)
        self.samples = self.read_ini("samples", int, 1000)
        fburn = self.read_ini("burn_fraction", float, 0.0)
        if 0.0 <= fburn < 1.0:
            self.nburn = int(fburn * self.samples)
        else:
            self.nburn = int(fburn)

        self.Rconverge = self.read_ini("Rconverge", float, -1.0)
        if self.Rconverge==-1.0:
            self.Rconverge=None

        params = self.define_parameters()

        @pymc.data
        @pymc.stochastic(verbose=self.verbose)
        def data_likelihood(params=params, value=0.0):
            try:
                params = self.pipeline.denormalize_vector(params)
            except ValueError:
                return -np.inf
            like, extra = self.pipeline.likelihood(params)
            return like

        self.mcmc = self.pymc.MCMC(input={'data_likelihood': data_likelihood,
                                          'params': params},
                                   db='ram', verbose=0)

        try:
            covmat = self.load_covariance_matrix()
        except IOError:
            covmat = None

        # determine step method
        self.do_adaptive = self.read_ini("adaptive_mcmc", bool, False)
        if self.do_adaptive:
            delay = 100
        else:
            delay = 10000000000

        if covmat is not None or self.do_adaptive:
            self.mcmc.use_step_method(self.pymc.AdaptiveMetropolis,
                                      params,
                                      cov=covmat,
                                      interval=self.nsteps,
                                      delay=delay,
                                      verbose=0)
        else:
            for p in params:
                self.mcmc.use_step_method(self.pymc.Metropolis, p, verbose=0)

        self.analytics = Analytics(self.pipeline.varied_params, self.pool)
Beispiel #4
0
class MetropolisSampler(ParallelSampler):
    parallel_output = True
    sampler_outputs = [("post", float)]

    def config(self):
        global pipeline
        pipeline = self.pipeline
        self.samples = self.read_ini("samples", int, default=20000)
        random_start = self.read_ini("random_start", bool, default=False)
        self.Rconverge = self.read_ini("Rconverge", float, -1.0)
        if self.Rconverge==-1.0:
            self.Rconverge=None
        self.interrupted = False
        self.num_samples = 0
        #Any other options go here

        #start values from prior
        start = self.define_parameters(random_start)
        print "MCMC starting point:"
        for param, x in zip(self.pipeline.varied_params, start):
            print "    ", param, x
        self.n = self.read_ini("nsteps", int, default=100)

        #Covariance matrix
        covmat = self.load_covariance_matrix()

        #Sampler object itself.
        quiet = self.pipeline.quiet
        self.sampler = metropolis.MCMC(start, posterior, covmat, quiet=quiet)
        self.analytics = Analytics(self.pipeline.varied_params, self.pool)

    def worker(self):
        while not self.is_converged():
            self.execute()

    def execute(self):
        #Run the MCMC  sampler.
        try:
            samples = self.sampler.sample(self.n)
        except KeyboardInterrupt:
            self.interrupted=True
            return
        self.num_samples += self.n
        traces = np.empty((self.n,len(self.pipeline.varied_params)))
        likes = np.empty((self.n))
        for i, (vector, like, extra) in enumerate(samples):
            self.output.parameters(vector, extra, like)
            traces[i,:] = vector
        self.analytics.add_traces(traces)	

        rate = self.sampler.accepted * 100.0 / self.sampler.iterations
        print "Accepted %d / %d samples (%.2f%%)\n" % \
            (self.sampler.accepted, self.sampler.iterations, rate)
        self.sampler.tune()

    def is_converged(self):
         # user has pressed Ctrl-C
        if self.interrupted:
            return True
        if self.num_samples >= self.samples:
            print "Full number of samples generated; sampling complete"
            return True
        elif self.num_samples > 0 and \
                self.pool is not None and \
                self.Rconverge is not None:
            R = self.analytics.gelman_rubin(quiet=self.pipeline.quiet)
            R1 = abs(R - 1)
            return np.all(R1 <= self.Rconverge)
        else:
            return False



    def load_covariance_matrix(self):
        covmat_filename = self.read_ini("covmat", str, "").strip()
        if covmat_filename == "" and self.distribution_hints.has_cov():
                covmat =  self.distribution_hints.get_cov() 
        elif covmat_filename == "":
            covmat = np.array([p.width()/100.0 for p in self.pipeline.varied_params])
        elif not os.path.exists(covmat_filename):
            raise ValueError(
            "Covariance matrix %s not found" % covmat_filename)
        else:
            covmat = np.loadtxt(covmat_filename)

        if covmat.ndim == 0:
            covmat = covmat.reshape((1, 1))
        elif covmat.ndim == 1:
            covmat = np.diag(covmat ** 2)

        nparams = len(self.pipeline.varied_params)
        if covmat.shape != (nparams, nparams):
            raise ValueError("The covariance matrix was shape (%d x %d), "
                    "but there are %d varied parameters." %
                    (covmat.shape[0], covmat.shape[1], nparams))
        return covmat



    def define_parameters(self, random_start):
        if random_start:
            return self.pipeline.randomized_start()
        else:
            return self.pipeline.start_vector()
    def config(self):
        global pipeline
        pipeline = self.pipeline
        self.samples = self.read_ini("samples", int, default=20000)
        random_start = self.read_ini("random_start", bool, default=False)
        use_cobaya = self.read_ini("cobaya", bool, default=False)
        self.Rconverge = self.read_ini("Rconverge", float, -1.0)
        self.drag = self.read_ini("drag", int, 0)
        self.oversampling = self.read_ini("oversampling", int, 5)
        tuning_frequency = self.read_ini("tuning_frequency", int, -1)
        tuning_grace = self.read_ini("tuning_grace", int, 5000)
        self.tuning_end = self.read_ini("tuning_end", int, 100000)
        self.n = self.read_ini("nsteps", int, default=100)
        self.exponential_probability = self.read_ini("exponential_probability",
                                                     float,
                                                     default=0.333)
        self.split = None  #work out later
        if self.Rconverge == -1.0:
            self.Rconverge = None
        self.interrupted = False
        self.num_samples = 0
        self.ndim = len(self.pipeline.varied_params)
        self.num_samples_post_tuning = 0
        self.last_accept_count = 0
        #Any other options go here

        # if we are not tunning then there is no tuning phase
        if tuning_frequency == -1:
            self.tuning_end = 0

        if (self.drag > 0) and not self.pipeline.do_fast_slow:
            print(
                "You asked for dragging, but the pipeline does not have fast/slow enabled"
                ", so no draggng will be done.")

        #start values from prior
        start = self.define_parameters(random_start)
        print("MCMC starting point:")
        for param, x in zip(self.pipeline.varied_params, start):
            print("    ", param, x)

        #Covariance matrix
        covmat = self.load_covariance_matrix()

        #Sampler object itself.
        quiet = self.pipeline.quiet
        start_norm = self.pipeline.normalize_vector(start)
        covmat_norm = self.pipeline.normalize_matrix(covmat)

        if use_cobaya:
            print("Using the Cobaya proposal")

        self.sampler = metropolis.MCMC(
            start_norm,
            posterior,
            covmat_norm,
            quiet=quiet,
            tuning_frequency=
            tuning_frequency,  # Will be multiplied by the oversampling
            tuning_grace=tuning_grace,  # within the sampler if needed
            tuning_end=self.tuning_end,
            exponential_probability=self.exponential_probability,
            use_cobaya=use_cobaya,
            n_drag=self.drag,
        )
        self.analytics = Analytics(self.pipeline.varied_params, self.pool)
        self.fast_slow_done = False
class MetropolisSampler(ParallelSampler):
    parallel_output = True
    sampler_outputs = [("prior", float), ("post", float)]
    understands_fast_subspaces = True
    supports_resume = True

    def config(self):
        global pipeline
        pipeline = self.pipeline
        self.samples = self.read_ini("samples", int, default=20000)
        random_start = self.read_ini("random_start", bool, default=False)
        use_cobaya = self.read_ini("cobaya", bool, default=False)
        self.Rconverge = self.read_ini("Rconverge", float, -1.0)
        self.drag = self.read_ini("drag", int, 0)
        self.oversampling = self.read_ini("oversampling", int, 5)
        tuning_frequency = self.read_ini("tuning_frequency", int, -1)
        tuning_grace = self.read_ini("tuning_grace", int, 5000)
        self.tuning_end = self.read_ini("tuning_end", int, 100000)
        self.n = self.read_ini("nsteps", int, default=100)
        self.exponential_probability = self.read_ini("exponential_probability",
                                                     float,
                                                     default=0.333)
        self.split = None  #work out later
        if self.Rconverge == -1.0:
            self.Rconverge = None
        self.interrupted = False
        self.num_samples = 0
        self.ndim = len(self.pipeline.varied_params)
        self.num_samples_post_tuning = 0
        self.last_accept_count = 0
        #Any other options go here

        # if we are not tunning then there is no tuning phase
        if tuning_frequency == -1:
            self.tuning_end = 0

        if (self.drag > 0) and not self.pipeline.do_fast_slow:
            print(
                "You asked for dragging, but the pipeline does not have fast/slow enabled"
                ", so no draggng will be done.")

        #start values from prior
        start = self.define_parameters(random_start)
        print("MCMC starting point:")
        for param, x in zip(self.pipeline.varied_params, start):
            print("    ", param, x)

        #Covariance matrix
        covmat = self.load_covariance_matrix()

        #Sampler object itself.
        quiet = self.pipeline.quiet
        start_norm = self.pipeline.normalize_vector(start)
        covmat_norm = self.pipeline.normalize_matrix(covmat)

        if use_cobaya:
            print("Using the Cobaya proposal")

        self.sampler = metropolis.MCMC(
            start_norm,
            posterior,
            covmat_norm,
            quiet=quiet,
            tuning_frequency=
            tuning_frequency,  # Will be multiplied by the oversampling
            tuning_grace=tuning_grace,  # within the sampler if needed
            tuning_end=self.tuning_end,
            exponential_probability=self.exponential_probability,
            use_cobaya=use_cobaya,
            n_drag=self.drag,
        )
        self.analytics = Analytics(self.pipeline.varied_params, self.pool)
        self.fast_slow_done = False

    def worker(self):
        while not self.is_converged():
            self.execute()
            if self.output:
                self.output.flush()

    def resume(self):
        resume_info = self.read_resume_info()
        if resume_info is None:
            return

        sampler, self.num_samples, self.num_samples_post_tuning = resume_info

        self.sampler = sampler

        # If we started main sampling (as opposed to tuning phase)
        # then we will have some existing chain, but this is not always the case
        try:
            data = np.genfromtxt(self.output._filename,
                                 invalid_raise=False)[:, :self.ndim]
            self.analytics.add_traces(data)
        except IndexError:
            data = None

        if self.num_samples >= self.samples:
            print(
                "You told me to resume the chain - it has already completed (with {} samples), so sampling will end."
                .format(len(data)))
            print("Increase the 'samples' parameter to keep going.")
        elif self.is_converged():
            print(
                "The resumed chain was already converged.  You can change the converged testing parameters to extend it."
            )
        elif data is None:
            print(
                "Continuing metropolis from existing chain - you were in the tuning phase, which will continue"
            )
        else:
            print(
                "Continuing metropolis from existing chain - have {} samples already"
                .format(len(data)))

    def execute(self):
        #Run the MCMC  sampler.
        if self.pipeline.do_fast_slow and not self.fast_slow_done:
            self.fast_slow_done = True
            self.sampler.set_fast_slow(self.pipeline.fast_param_indices,
                                       self.pipeline.slow_param_indices,
                                       self.oversampling)

        try:
            samples = self.sampler.sample(self.n)
        except KeyboardInterrupt:
            self.interrupted = True
            return
        self.num_samples += self.n
        self.num_samples_post_tuning = self.num_samples - self.tuning_end

        # Only output samples once tuning is complete
        if self.num_samples_post_tuning > 0:
            traces = np.empty((self.n, self.ndim))
            likes = np.empty((self.n))

            samples = samples[-self.num_samples_post_tuning:]
            for i, result in enumerate(samples):
                self.output.parameters(result.vector, result.extra,
                                       result.prior, result.post)
                traces[i, :] = result.vector

            self.analytics.add_traces(traces)

            overall_rate = (self.sampler.accepted *
                            1.0) / self.sampler.iterations
            recent_accepted = self.sampler.accepted - self.last_accept_count
            recent_rate = recent_accepted / self.n
            print("Overall accepted {} / {} samples ({:.1%})".format(
                self.sampler.accepted, self.sampler.iterations, overall_rate))
            print("Last {0} accepted {1} / {0} samples ({2:.1%})\n".format(
                self.n, recent_accepted, recent_rate))
            self.last_accept_count = self.sampler.accepted
        else:
            print(
                "Done {} samples. Tuning proposal until {} so no output yet\n".
                format(self.num_samples, self.tuning_end))

        self.write_resume_info(
            [self.sampler, self.num_samples, self.num_samples_post_tuning])

    def is_converged(self):
        # user has pressed Ctrl-C
        if self.interrupted:
            return True
        if self.num_samples >= self.samples:
            print("Full number of samples generated; sampling complete")
            return True
        elif (self.num_samples > 0 and self.pool is not None
              and self.Rconverge is not None
              and self.num_samples_post_tuning > 0):
            R = self.analytics.gelman_rubin(quiet=False)
            R1 = abs(R - 1)
            return np.all(R1 <= self.Rconverge)
        else:
            return False

    def load_covariance_matrix(self):
        covmat_filename = self.read_ini("covmat", str, "").strip()
        if covmat_filename == "" and self.distribution_hints.has_cov():
            covmat = self.distribution_hints.get_cov()
        elif covmat_filename == "":
            covmat = np.array(
                [p.width() / 100.0 for p in self.pipeline.varied_params])
        elif not os.path.exists(covmat_filename):
            raise ValueError("Covariance matrix %s not found" %
                             covmat_filename)
        else:
            covmat = np.loadtxt(covmat_filename)

        if covmat.ndim == 0:
            covmat = covmat.reshape((1, 1))
        elif covmat.ndim == 1:
            covmat = np.diag(covmat**2)

        nparams = len(self.pipeline.varied_params)
        if covmat.shape != (nparams, nparams):
            raise ValueError("The covariance matrix was shape (%d x %d), "
                             "but there are %d varied parameters." %
                             (covmat.shape[0], covmat.shape[1], nparams))
        return covmat

    def define_parameters(self, random_start):
        if random_start:
            return self.pipeline.randomized_start()
        else:
            return self.pipeline.start_vector()