Esempio n. 1
0
    def initial_samples_from_model(self, total_points, model,
                                   fitness_function):
        """
        Generate the initial points of the non-linear search, by randomly drawing unit values from a uniform
        distribution between the ball_lower_limit and ball_upper_limit values.

        Parameters
        ----------
        total_points : int
            The number of points in non-linear paramemter space which initial points are created for.
        model : ModelMapper
            An object that represents possible instances of some model with a given dimensionality which is the number
            of free dimensions of the model.
        """

        if conf.instance["general"]["test"]["test_mode"]:
            return self.initial_samples_in_test_mode(total_points=total_points,
                                                     model=model)

        logger.info(
            "Generating initial samples of model, which are subject to prior limits and other constraints."
        )

        initial_unit_parameter_lists = []
        initial_parameter_lists = []
        initial_figures_of_merit_list = []

        point_index = 0

        while point_index < total_points:

            unit_parameter_list = model.random_unit_vector_within_limits(
                lower_limit=self.lower_limit, upper_limit=self.upper_limit)
            parameter_list = model.vector_from_unit_vector(
                unit_vector=unit_parameter_list)

            try:
                figure_of_merit = fitness_function.figure_of_merit_from(
                    parameter_list=parameter_list)

                if np.isnan(figure_of_merit):
                    raise exc.FitException

                initial_unit_parameter_lists.append(unit_parameter_list)
                initial_parameter_lists.append(parameter_list)
                initial_figures_of_merit_list.append(figure_of_merit)
                point_index += 1
            except exc.FitException:
                pass

        return initial_unit_parameter_lists, initial_parameter_lists, initial_figures_of_merit_list
Esempio n. 2
0
    def _fit(self,
             model: AbstractPriorModel,
             analysis,
             log_likelihood_cap=None) -> res.Result:
        """
        Fit a model using MultiNest and the Analysis class which contains the data and returns the log likelihood from
        instances of the model, which the `NonLinearSearch` seeks to maximize.

        Parameters
        ----------
        model : ModelMapper
            The model which generates instances for different points in parameter space.
        analysis : Analysis
            Contains the data and the log likelihood function which fits an instance of the model to the data, returning
            the log likelihood the `NonLinearSearch` maximizes.

        Returns
        -------
        A result object comprising the Samples object that includes the maximum log likelihood instance and full
        set of accepted ssamples of the fit.
        """

        # noinspection PyUnusedLocal
        def prior(cube, ndim, nparams):
            # NEVER EVER REFACTOR THIS LINE! Haha.

            phys_cube = model.vector_from_unit_vector(unit_vector=cube)

            for i in range(len(phys_cube)):
                cube[i] = phys_cube[i]

            return cube

        fitness_function = self.fitness_function_from_model_and_analysis(
            model=model, analysis=analysis)

        import pymultinest

        logger.info("Beginning MultiNest non-linear search. ")

        pymultinest.run(fitness_function,
                        prior,
                        model.prior_count,
                        outputfiles_basename="{}/multinest".format(
                            self.paths.path),
                        verbose=not self.silence,
                        **self.config_dict_search)
        self.copy_from_sym()
Esempio n. 3
0
 def run(self):
     """
     Run this process, completing each job in the job_queue and
     passing the result to the queue.
     """
     logger.info("starting process {}".format(self.name))
     while True:
         sleep(0.025)
         if self.count >= self.max_count:
             break
         if self.job_queue.empty():
             self.count += 1
         else:
             self.count = 0
             job = self.job_queue.get()
             self.queue.put(job.perform())
     logger.info("terminating process {}".format(self.name))
     self.job_queue.close()
Esempio n. 4
0
    def __init__(self, name: str, job_queue: multiprocessing.Queue):
        """
        A parallel process that consumes Jobs through the job queue and outputs results through its own queue.

        Parameters
        ----------
        name: str
            The name of the process
        job_queue: multiprocessing.Queue
            The queue through which jobs are submitted
        """
        super().__init__(name=name)
        logger.info("created process {}".format(name))

        self.job_queue = job_queue
        self.queue = multiprocessing.Queue()
        self.count = 0
        self.max_count = 250
Esempio n. 5
0
    def _fit(self,
             model: AbstractPriorModel,
             analysis,
             log_likelihood_cap=None):
        """
        Fit a model using Zeus and the Analysis class which contains the data and returns the log likelihood from
        instances of the model, which the `NonLinearSearch` seeks to maximize.

        Parameters
        ----------
        model : ModelMapper
            The model which generates instances for different points in parameter space.
        analysis : Analysis
            Contains the data and the log likelihood function which fits an instance of the model to the data, returning
            the log likelihood the `NonLinearSearch` maximizes.

        Returns
        -------
        A result object comprising the Samples object that inclues the maximum log likelihood instance and full
        chains used by the fit.
        """

        pool = self.make_pool()

        fitness_function = self.fitness_function_from_model_and_analysis(
            model=model, analysis=analysis)

        if self.paths.is_object("zeus"):

            zeus_sampler = self.zeus_pickled

            zeus_state = zeus_sampler.get_last_sample()
            initial_log_posterior_list = zeus_sampler.get_last_log_prob()

            samples = self.samples_from(model=model)

            total_iterations = zeus_sampler.iteration

            if samples.converged:
                iterations_remaining = 0
            else:
                iterations_remaining = self.config_dict_run[
                    "nsteps"] - total_iterations

                logger.info(
                    "Existing Zeus samples found, resuming non-linear search.")

        else:

            zeus_sampler = zeus.EnsembleSampler(
                nwalkers=self.config_dict_search["nwalkers"],
                ndim=model.prior_count,
                logprob_fn=fitness_function.__call__,
                pool=pool,
            )

            zeus_sampler.ncall_total = 0

            initial_unit_parameter_lists, initial_parameter_lists, initial_log_posterior_list = self.initializer.initial_samples_from_model(
                total_points=zeus_sampler.nwalkers,
                model=model,
                fitness_function=fitness_function,
            )

            zeus_state = np.zeros(shape=(zeus_sampler.nwalkers,
                                         model.prior_count))

            logger.info(
                "No Zeus samples found, beginning new non-linear search.")

            for index, parameters in enumerate(initial_parameter_lists):

                zeus_state[index, :] = np.asarray(parameters)

            total_iterations = 0
            iterations_remaining = self.config_dict_run["nsteps"]

        while iterations_remaining > 0:

            if self.iterations_per_update > iterations_remaining:
                iterations = iterations_remaining
            else:
                iterations = self.iterations_per_update

            for sample in zeus_sampler.sample(
                    start=zeus_state,
                    log_prob0=initial_log_posterior_list,
                    iterations=iterations,
                    progress=True,
            ):

                pass

            zeus_sampler.ncall_total += zeus_sampler.ncall

            self.paths.save_object("zeus", zeus_sampler)

            zeus_state = zeus_sampler.get_last_sample()
            initial_log_posterior_list = zeus_sampler.get_last_log_prob()

            total_iterations += iterations
            iterations_remaining = self.config_dict_run[
                "nsteps"] - total_iterations

            samples = self.perform_update(model=model,
                                          analysis=analysis,
                                          during_analysis=True)

            if self.auto_correlations_settings.check_for_convergence:
                if zeus_sampler.iteration > self.auto_correlations_settings.check_size:
                    if samples.converged:
                        iterations_remaining = 0

            auto_correlation_time = zeus.AutoCorrTime(
                samples=zeus_sampler.get_chain())

            discard = int(3.0 * np.max(auto_correlation_time))
            thin = int(np.max(auto_correlation_time) / 2.0)
            chain = zeus_sampler.get_chain(discard=discard,
                                           thin=thin,
                                           flat=True)

            if "maxcall" in self.kwargs:
                if zeus_sampler.ncall_total > self.kwargs["maxcall"]:
                    iterations_remaining = 0

        logger.info("Zeus sampling complete.")
Esempio n. 6
0
    def perform_update(self, model, analysis, during_analysis):
        """
        Perform an update of the `NonLinearSearch` results, which occurs every *iterations_per_update* of the
        non-linear search. The update performs the following tasks:

        1) Visualize the maximum log likelihood model.
        2) Output the model results to the model.reults file.

        These task are performed every n updates, set by the relevent *task_every_update* variable, for example
        *visualize_every_update*

        Parameters
        ----------
        model : ModelMapper
            The model which generates instances for different points in parameter space.
        analysis : Analysis
            Contains the data and the log likelihood function which fits an instance of the model to the data, returning
            the log likelihood the `NonLinearSearch` maximizes.
        during_analysis : bool
            If the update is during a non-linear search, in which case tasks are only performed after a certain number
             of updates and only a subset of visualization may be performed.
        """

        self.iterations += self.iterations_per_update
        logger.info(
            f"{self.iterations} Iterations: Performing update (Visualization, outputting samples, etc.)."
        )

        self.timer.update()

        samples = self.samples_from(model=model)

        self.paths.save_samples(samples)

        if not during_analysis:
            self.plot_results(samples=samples)

        try:
            instance = samples.max_log_likelihood_instance
        except exc.FitException:
            return samples

        if self.should_visualize() or not during_analysis:

            analysis.visualize(paths=self.paths,
                               instance=instance,
                               during_analysis=during_analysis)

        if self.should_output_model_results() or not during_analysis:
            try:
                start = time.time()
                analysis.log_likelihood_function(instance=instance)
                log_likelihood_function_time = (time.time() - start)

                self.paths.save_summary(
                    samples=samples,
                    log_likelihood_function_time=log_likelihood_function_time)
            except exc.FitException:
                pass

        if not during_analysis and self.remove_state_files_at_end:
            try:
                self.remove_state_files()
            except FileNotFoundError:
                pass

        return samples
Esempio n. 7
0
    def fit(self,
            model,
            analysis: "Analysis",
            info=None,
            pickle_files=None,
            log_likelihood_cap=None) -> "Result":
        """
        Fit a model, M with some function f that takes instances of the
        class represented by model M and gives a score for their fitness.

        A model which represents possible instances with some dimensionality is fit.

        The analysis provides two functions. One visualises an instance of a model and the
        other scores an instance based on how well it fits some data. The search
        produces instances of the model by picking points in an N dimensional space.

        Parameters
        ----------
        log_likelihood_cap
        analysis : af.Analysis
            An object that encapsulates the data and a log likelihood function.
        model : ModelMapper
            An object that represents possible instances of some model with a
            given dimensionality which is the number of free dimensions of the
            model.
        info : dict
            Optional dictionary containing information about the fit that can be loaded by the aggregator.
        pickle_files : [str]
            Optional list of strings specifying the path and filename of .pickle files, that are copied to each
            model-fits pickles folder so they are accessible via the Aggregator.

        Returns
        -------
        An object encapsulating how well the model fit the data, the best fit instance
        and an updated model with free parameters updated to represent beliefs
        produced by this fit.
        """
        self.paths.model = model
        self.paths.unique_tag = self.unique_tag
        self.paths.restore()
        self.setup_log_file()

        if not self.paths.is_complete or self.force_pickle_overwrite:

            self.paths.save_all(search_config_dict=self.config_dict_search,
                                info=info,
                                pickle_files=pickle_files)
            analysis.save_attributes_for_aggregator(paths=self.paths)

        if not self.paths.is_complete:

            self.timer.start()

            self._fit(model=model,
                      analysis=analysis,
                      log_likelihood_cap=log_likelihood_cap)

            self.paths.completed()

            samples = self.perform_update(model=model,
                                          analysis=analysis,
                                          during_analysis=False)

            analysis.save_results_for_aggregator(paths=self.paths,
                                                 model=model,
                                                 samples=samples)
            self.paths.save_object("samples", samples)

        else:

            logger.info(
                f"{self.paths.name} already completed, skipping non-linear search."
            )
            samples = self.samples_from(model=model)

            if self.force_pickle_overwrite:

                self.paths.save_object("samples", samples)
                analysis.save_results_for_aggregator(paths=self.paths,
                                                     model=model,
                                                     samples=samples)

        self.paths.zip_remove()
        return analysis.make_result(samples=samples, model=model, search=self)
Esempio n. 8
0
    def _fit(self,
             model: AbstractPriorModel,
             analysis,
             log_likelihood_cap=None):
        """
        Fit a model using Emcee and the Analysis class which contains the data and returns the log likelihood from
        instances of the model, which the `NonLinearSearch` seeks to maximize.

        Parameters
        ----------
        model : ModelMapper
            The model which generates instances for different points in parameter space.
        analysis : Analysis
            Contains the data and the log likelihood function which fits an instance of the model to the data, returning
            the log likelihood the `NonLinearSearch` maximizes.

        Returns
        -------
        A result object comprising the Samples object that inclues the maximum log likelihood instance and full
        chains used by the fit.
        """

        pool, pool_ids = self.make_pool()

        fitness_function = self.fitness_function_from_model_and_analysis(
            model=model, analysis=analysis, pool_ids=pool_ids)

        emcee_sampler = emcee.EnsembleSampler(
            nwalkers=self.nwalkers,
            ndim=model.prior_count,
            log_prob_fn=fitness_function.__call__,
            backend=emcee.backends.HDFBackend(
                filename=self.paths.samples_path + "/emcee.hdf"),
            pool=pool,
        )

        try:

            emcee_state = emcee_sampler.get_last_sample()
            samples = self.samples_via_sampler_from_model(model=model)

            total_iterations = emcee_sampler.iteration

            if samples.converged:
                iterations_remaining = 0
            else:
                iterations_remaining = self.nsteps - total_iterations

                logger.info(
                    "Existing Emcee samples found, resuming non-linear search."
                )

        except AttributeError:

            initial_unit_parameters, initial_parameters, initial_log_posteriors = self.initializer.initial_samples_from_model(
                total_points=emcee_sampler.nwalkers,
                model=model,
                fitness_function=fitness_function,
            )

            emcee_state = np.zeros(shape=(emcee_sampler.nwalkers,
                                          model.prior_count))

            logger.info(
                "No Emcee samples found, beginning new non-linear search.")

            for index, parameters in enumerate(initial_parameters):

                emcee_state[index, :] = np.asarray(parameters)

            total_iterations = 0
            iterations_remaining = self.nsteps

        while iterations_remaining > 0:

            if self.iterations_per_update > iterations_remaining:
                iterations = iterations_remaining
            else:
                iterations = self.iterations_per_update

            for sample in emcee_sampler.sample(
                    initial_state=emcee_state,
                    iterations=iterations,
                    progress=True,
                    skip_initial_state_check=True,
                    store=True,
            ):

                pass

            emcee_state = emcee_sampler.get_last_sample()

            total_iterations += iterations
            iterations_remaining = self.nsteps - total_iterations

            samples = self.perform_update(model=model,
                                          analysis=analysis,
                                          during_analysis=True)

            if emcee_sampler.iteration % self.auto_correlation_check_size:
                if samples.converged and self.auto_correlation_check_for_convergence:
                    iterations_remaining = 0

        logger.info("Emcee sampling complete.")
Esempio n. 9
0
    def fit(self,
            model,
            analysis: "Analysis",
            info=None,
            pickle_files=None,
            log_likelihood_cap=None) -> "Result":
        """ Fit a model, M with some function f that takes instances of the
        class represented by model M and gives a score for their fitness.

        A model which represents possible instances with some dimensionality is fit.

        The analysis provides two functions. One visualises an instance of a model and the
        other scores an instance based on how well it fits some data. The search
        produces instances of the model by picking points in an N dimensional space.

        Parameters
        ----------
        analysis : af.Analysis
            An object that encapsulates the data and a log likelihood function.
        model : ModelMapper
            An object that represents possible instances of some model with a
            given dimensionality which is the number of free dimensions of the
            model.
        info : dict
            Optional dictionary containing information about the fit that can be loaded by the aggregator.
        pickle_files : [str]
            Optional list of strings specifying the path and filename of .pickle files, that are copied to each
            model-fits pickles folder so they are accessible via the Aggregator.

        Returns
        -------
        An object encapsulating how well the model fit the data, the best fit instance
        and an updated model with free parameters updated to represent beliefs
        produced by this fit.
        """

        try:
            os.makedirs(self.paths.samples_path)
        except FileExistsError:
            pass

        self.paths.restore()
        self.setup_log_file()

        if (not path.exists(self.paths.has_completed_path)) or \
                self.force_pickle_overwrite:

            self.save_model_info(model=model)
            self.save_parameter_names_file(model=model)
            self.save_metadata()
            self.save_info(info=info)
            self.save_search()
            self.save_model(model=model)
            self.move_pickle_files(pickle_files=pickle_files)
            analysis.save_attributes_for_aggregator(paths=self.paths)

        if not path.exists(self.paths.has_completed_path):

            # TODO : Better way to handle?
            self.timer.paths = self.paths
            self.timer.start()

            self._fit(model=model,
                      analysis=analysis,
                      log_likelihood_cap=log_likelihood_cap)
            open(self.paths.has_completed_path, "w+").close()

            samples = self.perform_update(model=model,
                                          analysis=analysis,
                                          during_analysis=False)

            analysis.save_results_for_aggregator(paths=self.paths,
                                                 samples=samples)

        else:

            logger.info(
                f"{self.paths.name} already completed, skipping non-linear search."
            )
            samples = self.samples_via_csv_json_from_model(model=model)

            if self.force_pickle_overwrite:
                self.save_samples(samples=samples)
                analysis.save_results_for_aggregator(paths=self.paths,
                                                     samples=samples)

        self.paths.zip_remove()
        return Result(samples=samples, previous_model=model, search=self)
Esempio n. 10
0
    def _fit(self,
             model: AbstractPriorModel,
             analysis,
             log_likelihood_cap=None):
        """
        Fit a model using Dynesty and the Analysis class which contains the data and returns the log likelihood from
        instances of the model, which the `NonLinearSearch` seeks to maximize.

        Parameters
        ----------
        model : ModelMapper
            The model which generates instances for different points in parameter space.
        analysis : Analysis
            Contains the data and the log likelihood function which fits an instance of the model to the data, returning
            the log likelihood the `NonLinearSearch` maximizes.

        Returns
        -------
        A result object comprising the Samples object that includes the maximum log likelihood instance and full
        set of accepted ssamples of the fit.
        """

        pool, pool_ids = self.make_pool()

        fitness_function = self.fitness_function_from_model_and_analysis(
            model=model,
            analysis=analysis,
            pool_ids=pool_ids,
            log_likelihood_cap=log_likelihood_cap,
        )

        if self.paths.is_object("dynesty"):

            sampler = self.paths.load_object("dynesty")
            sampler.loglikelihood = fitness_function
            sampler.pool = pool

            sampler.rstate = np.random

            if self.number_of_cores == 1:
                sampler.M = map
            else:
                sampler.M = pool.map

            logger.info(
                "Existing Dynesty samples found, resuming non-linear search.")

        else:

            sampler = self.sampler_from(model=model,
                                        fitness_function=fitness_function,
                                        pool=pool)

            logger.info(
                "No Dynesty samples found, beginning new non-linear search. ")

        finished = False

        while not finished:

            try:
                total_iterations = np.sum(sampler.results.ncall)
            except AttributeError:
                total_iterations = 0

            if self.config_dict_run["maxcall"] is not None:
                iterations = self.config_dict_run["maxcall"] - total_iterations
            else:
                iterations = self.iterations_per_update

            if iterations > 0:

                for i in range(10):

                    try:

                        config_dict_run = self.config_dict_run
                        config_dict_run.pop("maxcall")

                        sampler.run_nested(maxcall=iterations,
                                           print_progress=not self.silence,
                                           **config_dict_run)

                        if i == 9:
                            raise ValueError(
                                "Dynesty crashed due to repeated bounding errors"
                            )

                        break

                    except (ValueError, np.linalg.LinAlgError):

                        continue

            sampler.loglikelihood = None

            self.paths.save_object("dynesty", sampler)

            sampler.loglikelihood = fitness_function

            self.perform_update(model=model,
                                analysis=analysis,
                                during_analysis=True)

            iterations_after_run = np.sum(sampler.results.ncall)

            if (total_iterations == iterations_after_run
                    or total_iterations == self.config_dict_run["maxcall"]):
                finished = True
Esempio n. 11
0
    def _fit(self,
             model: AbstractPriorModel,
             analysis,
             log_likelihood_cap=None):
        """
        Fit a model using PySwarms and the Analysis class which contains the data and returns the log likelihood from
        instances of the model, which the `NonLinearSearch` seeks to maximize.

        Parameters
        ----------
        model : ModelMapper
            The model which generates instances for different points in parameter space.
        analysis : Analysis
            Contains the data and the log likelihood function which fits an instance of the model to the data, returning
            the log likelihood the `NonLinearSearch` maximizes.

        Returns
        -------
        A result object comprising the Samples object that inclues the maximum log likelihood instance and full
        chains used by the fit.
        """
        pool, pool_ids = self.make_pool()

        fitness_function = self.fitness_function_from_model_and_analysis(
            model=model, analysis=analysis, pool_ids=pool_ids)

        if self.paths.is_object("points"):

            init_pos = self.load_points[-1]
            total_iterations = self.load_total_iterations

            logger.info(
                "Existing PySwarms samples found, resuming non-linear search.")

        else:

            initial_unit_parameter_lists, initial_parameter_lists, initial_log_posterior_list = self.initializer.initial_samples_from_model(
                total_points=self.config_dict_search["n_particles"],
                model=model,
                fitness_function=fitness_function,
            )

            init_pos = np.zeros(shape=(self.config_dict_search["n_particles"],
                                       model.prior_count))

            for index, parameters in enumerate(initial_parameter_lists):

                init_pos[index, :] = np.asarray(parameters)

            total_iterations = 0

            logger.info(
                "No PySwarms samples found, beginning new non-linear search. ")

        ## TODO : Use actual limits

        vector_lower = model.vector_from_unit_vector(unit_vector=[1e-6] *
                                                     model.prior_count)
        vector_upper = model.vector_from_unit_vector(unit_vector=[0.9999999] *
                                                     model.prior_count)

        lower_bounds = []
        upper_bounds = []

        for lower in vector_lower:
            lower_bounds.append(lower)
        for upper in vector_upper:
            upper_bounds.append(upper)

        bounds = (np.asarray(lower_bounds), np.asarray(upper_bounds))

        logger.info("Running PySwarmsGlobal Optimizer...")

        while total_iterations < self.config_dict_run["iters"]:

            pso = self.sampler_from(model=model,
                                    fitness_function=fitness_function,
                                    bounds=bounds,
                                    init_pos=init_pos)

            iterations_remaining = self.config_dict_run[
                "iters"] - total_iterations

            if self.iterations_per_update > iterations_remaining:
                iterations = iterations_remaining
            else:
                iterations = self.iterations_per_update

            if iterations > 0:

                pso.optimize(objective_func=fitness_function.__call__,
                             iters=iterations)

                total_iterations += iterations

                self.paths.save_object("total_iterations", total_iterations)
                self.paths.save_object("points", pso.pos_history)
                self.paths.save_object(
                    "log_posterior_list",
                    [-0.5 * cost for cost in pso.cost_history])

                self.perform_update(model=model,
                                    analysis=analysis,
                                    during_analysis=True)

                init_pos = self.load_points[-1]

        logger.info("PySwarmsGlobal complete")