예제 #1
0
    def go(self,
           continue_on_failure=True,
           compute_covariance=False,
           verbose=False,
           **options_for_parallel_computation):

        # Generate the data frame which will contain all results

        self._continue_on_failure = continue_on_failure

        self._compute_covariance = compute_covariance

        # let's iterate, perform the fit and fill the data frame

        if threeML_config["parallel"]["use_parallel"]:

            # Parallel computation

            with silence_console_log(and_progress_bars=False):
                client = ParallelClient(**options_for_parallel_computation)

                results = client.execute_with_progress_bar(
                    self.worker, list(range(self._n_iterations)))

        else:

            # Serial computation

            results = []

            with silence_console_log(and_progress_bars=False):

                for i in trange(self._n_iterations,
                                desc="Goodness of fit computation"):

                    results.append(self.worker(i))

        assert len(results) == self._n_iterations, (
            "Something went wrong, I have %s results "
            "for %s intervals" % (len(results), self._n_iterations))

        # Store the results in the data frames

        parameter_frames = pd.concat([x[0] for x in results],
                                     keys=list(range(self._n_iterations)))
        like_frames = pd.concat([x[1] for x in results],
                                keys=list(range(self._n_iterations)))

        # Store a list with all results (this is a list of lists, each list contains the results for the different
        # iterations for the same model)
        self._all_results = []

        for i in range(self._n_models):

            this_model_results = [x[2][i] for x in results]

            self._all_results.append(AnalysisResultsSet(this_model_results))

        return parameter_frames, like_frames
예제 #2
0
    def go(self, continue_on_failure=True, compute_covariance=False, verbose=False, **options_for_parallel_computation):

        # Generate the data frame which will contain all results

        if verbose:

            log.setLevel(logging.INFO)

        self._continue_on_failure = continue_on_failure

        self._compute_covariance = compute_covariance

        # let's iterate, perform the fit and fill the data frame

        if threeML_config['parallel']['use-parallel']:

            # Parallel computation

            client = ParallelClient(**options_for_parallel_computation)

            results = client.execute_with_progress_bar(self.worker, range(self._n_iterations))


        else:

            # Serial computation

            results = []

            with progress_bar(self._n_iterations, title='Goodness of fit computation') as p:

                for i in range(self._n_iterations):

                    results.append(self.worker(i))

                    p.increase()

        assert len(results) == self._n_iterations, "Something went wrong, I have %s results " \
                                                   "for %s intervals" % (len(results), self._n_iterations)

        # Store the results in the data frames

        parameter_frames = pd.concat(map(lambda x: x[0], results), keys=range(self._n_iterations))
        like_frames = pd.concat(map(lambda x: x[1], results), keys=range(self._n_iterations))

        # Store a list with all results (this is a list of lists, each list contains the results for the different
        # iterations for the same model)
        self._all_results = []

        for i in range(self._n_models):

            this_model_results = map(lambda x: x[2][i], results)

            self._all_results.append(AnalysisResultsSet(this_model_results))

        return parameter_frames, like_frames
예제 #3
0
파일: ts_map.py 프로젝트: threeML/hawc_hal
    def go(self):

        if is_parallel_computation_active():

            client = ParallelClient()

            if self._n_decs % client.get_number_of_engines() != 0:

                log.warning(
                    "The number of Dec bands is not a multiple of the number of engine. Make it so for optimal performances.",
                    RuntimeWarning)

            res = client.execute_with_progress_bar(
                self.worker,
                list(range(len(self._points))),
                chunk_size=self._n_ras)

        else:

            n_points = len(self._points)

            p = tqdm(total=n_points)

            res = np.zeros(n_points)

            for i, point in enumerate(self._points):

                res[i] = self.worker(i)

                p.update(1)

        TS = 2 * (-np.array(res) - self._like0)

        #self._debug_map = {k:v for v,k in zip(self._points, TS)}

        # Get maximum of TS
        idx = TS.argmax()
        self._max_ts = (TS[idx], self._points[idx])

        log.info("Maximum TS is %.2f at (R.A., Dec) = (%.3f, %.3f)" %
                 (self._max_ts[0], self._max_ts[1][0], self._max_ts[1][1]))

        self._ts_map = TS.reshape(self._n_decs, self._n_ras)

        return self._ts_map
예제 #4
0
    def sample(self, quiet=False):

        if not self._is_setup:

            log.info("You forgot to setup the sampler!")
            return

        loud = not quiet

        self._update_free_parameters()

        n_dim = len(list(self._free_parameters.keys()))

        # Get starting point

        p0 = emcee.State(self._get_starting_points(self._n_walkers))

        # Deactivate memoization in astromodels, which is useless in this case since we will never use twice the
        # same set of parameters
        with use_astromodels_memoization(False):

            if threeML_config["parallel"]["use_parallel"]:

                c = ParallelClient()
                view = c[:]

                sampler = emcee.EnsembleSampler(self._n_walkers,
                                                n_dim,
                                                self.get_posterior,
                                                pool=view)

            else:

                sampler = emcee.EnsembleSampler(self._n_walkers, n_dim,
                                                self.get_posterior)

            # If a seed is provided, set the random number seed
            if self._seed is not None:

                sampler._random.seed(self._seed)

            log.debug("Start emcee run")
            # Sample the burn-in

            if threeML_config.interface.progress_bars:

                if is_inside_notebook():

                    progress = "notebook"

                else:
                    progress = True

            else:

                progress = False

            pos, prob, state = sampler.run_mcmc(initial_state=p0,
                                                nsteps=self._n_burn_in,
                                                progress=progress)
            log.debug("Emcee run done")

            # Reset sampler

            sampler.reset()

            state = emcee.State(pos, prob, random_state=state)

            # Run the true sampling

            _ = sampler.run_mcmc(initial_state=state,
                                 nsteps=self._n_iterations,
                                 progress=progress)

        acc = np.mean(sampler.acceptance_fraction)

        log.info(f"Mean acceptance fraction: {acc}")

        self._sampler = sampler
        self._raw_samples = sampler.get_chain(flat=True)

        # Compute the corresponding values of the likelihood

        # First we need the prior
        log_prior = [self._log_prior(x) for x in self._raw_samples]

        # Now we get the log posterior and we remove the log prior

        self._log_like_values = sampler.get_log_prob(flat=True) - log_prior

        # we also want to store the log probability

        self._log_probability_values = sampler.get_log_prob(flat=True)

        self._marginal_likelihood = None

        self._build_samples_dictionary()

        self._build_results()

        # Display results
        if loud:
            self._results.display()

        return self.samples
예제 #5
0
    def sample(self, quiet=False):
        """
        sample using the UltraNest numerical integration method
        :rtype: 

        :returns: 

        """
        if not self._is_setup:

            log.info("You forgot to setup the sampler!")
            return

        loud = not quiet

        self._update_free_parameters()

        param_names = list(self._free_parameters.keys())

        ndim = len(param_names)

        self._kwargs["ndim"] = ndim

        loglike, dynesty_prior = self._construct_unitcube_posterior(return_copy=True)

        # check if we are doing to do things in parallel

        if threeML_config["parallel"]["use_parallel"]:

            c = ParallelClient()
            view = c[:]

            self._kwargs["pool"] = view
            self._kwargs["queue_size"] = len(view)

        sampler = DynamicNestedSampler(loglike, dynesty_prior, **self._kwargs)

        self._sampler_kwargs["print_progress"] = loud

        with use_astromodels_memoization(False):
            log.debug("Start dynestsy run")
            sampler.run_nested(**self._sampler_kwargs)
            log.debug("Dynesty run done")

        self._sampler = sampler

        results = self._sampler.results

        # draw posterior samples
        weights = np.exp(results["logwt"] - results["logz"][-1])

        SQRTEPS = math.sqrt(float(np.finfo(np.float64).eps))

        rstate = np.random

        if abs(np.sum(weights) - 1.0) > SQRTEPS:  # same tol as in np.random.choice.
            raise ValueError("Weights do not sum to 1.")

        # Make N subdivisions and choose positions with a consistent random offset.
        nsamples = len(weights)
        positions = (rstate.random() + np.arange(nsamples)) / nsamples

        # Resample the data.
        idx = np.zeros(nsamples, dtype=np.int)
        cumulative_sum = np.cumsum(weights)
        i, j = 0, 0
        while i < nsamples:
            if positions[i] < cumulative_sum[j]:
                idx[i] = j
                i += 1
            else:
                j += 1

        samples_dynesty = results["samples"][idx]

        self._raw_samples = samples_dynesty

        # now do the same for the log likes

        logl_dynesty = results["logl"][idx]

        self._log_like_values = logl_dynesty

        self._log_probability_values = self._log_like_values + np.array(
            [self._log_prior(samples) for samples in self._raw_samples]
        )

        self._marginal_likelihood = self._sampler.results["logz"][-1] / np.log(10.0)

        self._build_results()

        # Display results
        if loud:
            self._results.display()

        # now get the marginal likelihood

        return self.samples
예제 #6
0
    def get_contours(self, param_1, param_1_minimum, param_1_maximum, param_1_n_steps,
                     param_2=None, param_2_minimum=None, param_2_maximum=None, param_2_n_steps=None,
                     progress=True, **options):
        """
        Generate confidence contours for the given parameters by stepping for the given number of steps between
        the given boundaries. Call it specifying only source_1, param_1, param_1_minimum and param_1_maximum to
        generate the profile of the likelihood for parameter 1. Specify all parameters to obtain instead a 2d
        contour of param_1 vs param_2.

        NOTE: if using parallel computation, param_1_n_steps must be an integer multiple of the number of running
        engines. If that is not the case, the code will reduce the number of steps to match that requirement, and
        issue a warning

        :param param_1: fully qualified name of the first parameter or parameter instance
        :param param_1_minimum: lower bound for the range for the first parameter
        :param param_1_maximum: upper bound for the range for the first parameter
        :param param_1_n_steps: number of steps for the first parameter
        :param param_2: fully qualified name of the second parameter or parameter instance
        :param param_2_minimum: lower bound for the range for the second parameter
        :param param_2_maximum: upper bound for the range for the second parameter
        :param param_2_n_steps: number of steps for the second parameter
        :param progress: (True or False) whether to display progress or not
        :param log: by default the steps are taken linearly. With this optional parameter you can provide a tuple of
                    booleans which specify whether the steps are to be taken logarithmically. For example,
                    'log=(True,False)' specify that the steps for the first parameter are to be taken logarithmically,
                    while they are linear for the second parameter. If you are generating the profile for only one
                    parameter, you can specify 'log=(True,)' or 'log=(False,)' (optional)
        :return: a tuple containing an array corresponding to the steps for the first parameter, an array corresponding
                 to the steps for the second parameter (or None if stepping only in one direction), a matrix of size
                 param_1_steps x param_2_steps containing the value of the function at the corresponding points in the
                 grid. If param_2_steps is None (only one parameter), then this reduces to an array of
                 size param_1_steps.
        """

        if hasattr(param_1,"value"):

            # Substitute with the name
            param_1 = param_1.path

        if hasattr(param_2,'value'):

            param_2 = param_2.path

        # Check that the parameters exist
        assert param_1 in self._likelihood_model.free_parameters, "Parameter %s is not a free parameters of the " \
                                                                 "current model" % param_1

        if param_2 is not None:
            assert param_2 in self._likelihood_model.free_parameters, "Parameter %s is not a free parameters of the " \
                                                                      "current model" % param_2


        # Check that we have a valid fit

        assert self._current_minimum is not None, "You have to run the .fit method before calling get_contours."

        # Then restore the best fit

        self._minimizer.restore_best_fit()

        # Check minimal assumptions about the procedure

        assert not (param_1 == param_2), "You have to specify two different parameters"

        assert param_1_minimum < param_1_maximum, "Minimum larger than maximum for parameter 1"

        min1, max1 = self.likelihood_model[param_1].bounds

        if min1 is not None:

            assert param_1_minimum >= min1, "Requested low range for parameter %s (%s) " \
                                            "is below parameter minimum (%s)" % (param_1, param_1_minimum, min1)

        if max1 is not None:

            assert param_1_maximum <= max1, "Requested hi range for parameter %s (%s) " \
                                            "is above parameter maximum (%s)" % (param_1, param_1_maximum, max1)

        if param_2 is not None:

            min2, max2 = self.likelihood_model[param_2].bounds

            if min2 is not None:

                assert param_2_minimum >= min2, "Requested low range for parameter %s (%s) " \
                                                "is below parameter minimum (%s)" % (param_2, param_2_minimum, min2)

            if max2 is not None:

                assert param_2_maximum <= max2, "Requested hi range for parameter %s (%s) " \
                                                "is above parameter maximum (%s)" % (param_2, param_2_maximum, max2)

        # Check whether we are parallelizing or not

        if not threeML_config['parallel']['use-parallel']:

            a, b, cc = self.minimizer.contours(param_1, param_1_minimum, param_1_maximum, param_1_n_steps,
                                               param_2, param_2_minimum, param_2_maximum, param_2_n_steps,
                                               progress, **options)

            # Collapse the second dimension of the results if we are doing a 1d contour

            if param_2 is None:
                cc = cc[:, 0]

        else:

            # With parallel computation

            # In order to distribute fairly the computation, the strategy is to parallelize the computation
            # by assigning to the engines one "line" of the grid at the time

            # Connect to the engines

            client = ParallelClient(**options)

            # Get the number of engines

            n_engines = client.get_number_of_engines()

            # Check whether the number of threads is larger than the number of steps in the first direction

            if n_engines > param_1_n_steps:

                n_engines = int(param_1_n_steps)

                custom_warnings.warn("The number of engines is larger than the number of steps. Using only %s engines."
                                     % n_engines, ReducingNumberOfThreads)

            # Check if the number of steps is divisible by the number
            # of threads, otherwise issue a warning and make it so

            if float(param_1_n_steps) % n_engines != 0:
                # Set the number of steps to an integer multiple of the engines
                # (note that // is the floor division, also called integer division)

                param_1_n_steps = (param_1_n_steps // n_engines) * n_engines

                custom_warnings.warn("Number of steps is not a multiple of the number of threads. Reducing steps to %s"
                                     % param_1_n_steps, ReducingNumberOfSteps)

            # Compute the number of splits, i.e., how many lines in the grid for each engine.
            # (note that this is guaranteed to be an integer number after the previous checks)

            p1_split_steps = param_1_n_steps // n_engines

            # Prepare arrays for results

            if param_2 is None:

                # One array
                pcc = np.zeros(param_1_n_steps)

                pa = np.linspace(param_1_minimum, param_1_maximum, param_1_n_steps)
                pb = None

            else:

                pcc = np.zeros((param_1_n_steps, param_2_n_steps))

                # Prepare the two axes of the parameter space
                pa = np.linspace(param_1_minimum, param_1_maximum, param_1_n_steps)
                pb = np.linspace(param_2_minimum, param_2_maximum, param_2_n_steps)

            # Define the parallel worker which will go through the computation

            # NOTE: I only divide
            # on the first parameter axis so that the different
            # threads are more or less well mixed for points close and
            # far from the best fit

            def worker(start_index):

                # Re-create the minimizer

                backup_freeParameters = map(lambda x:x.value, self._likelihood_model.free_parameters.values())

                this_minimizer = self._get_minimizer(self.minus_log_like_profile,
                                                     self._free_parameters)

                this_p1min = pa[start_index * p1_split_steps]
                this_p1max = pa[(start_index + 1) * p1_split_steps - 1]

                # print("From %s to %s" % (this_p1min, this_p1max))

                aa, bb, ccc = this_minimizer.contours(param_1, this_p1min, this_p1max, p1_split_steps,
                                                      param_2, param_2_minimum, param_2_maximum,
                                                      param_2_n_steps,
                                                      progress=True, **options)

                # Restore best fit values

                for val, par in zip(backup_freeParameters, self._likelihood_model.free_parameters.values()):

                    par.value = val

                return ccc

            # Now re-assemble the vector of results taking the different parts from the engines

            all_results = client.execute_with_progress_bar(worker, range(n_engines), chunk_size=1)

            for i, these_results in enumerate(all_results):

                if param_2 is None:

                    pcc[i * p1_split_steps: (i + 1) * p1_split_steps] = these_results[:, 0]

                else:

                    pcc[i * p1_split_steps: (i + 1) * p1_split_steps, :] = these_results

            # Give the results the names that the following code expect. These are kept separate for debugging
            # purposes

            cc = pcc
            a = pa
            b = pb

        # Here we have done the computation, in parallel computation or not. Let's make the plot
        # with the contour

        if param_2 is not None:

            # 2d contour

            fig = self._plot_contours("%s" % (param_1), a, "%s" % (param_2,), b, cc)

        else:

            # 1d contour (i.e., a profile)

            fig = self._plot_profile("%s" % (param_1), a, cc)

        # Check if we found a better minimum. This shouldn't happen, but in case of very difficult fit
        # it might.

        if self._current_minimum - cc.min() > 0.1:

            if param_2 is not None:

                idx = cc.argmin()

                aidx, bidx = np.unravel_index(idx, cc.shape)

                print("\nFound a better minimum: %s with %s = %s and %s = %s. Run again your fit starting from here."
                      % (cc.min(), param_1, a[aidx], param_2, b[bidx]))

            else:

                idx = cc.argmin()

                print("Found a better minimum: %s with %s = %s. Run again your fit starting from here."
                      % (cc.min(), param_1, a[idx]))

        return a, b, cc, fig
예제 #7
0
    def _unbinned_fit_polynomials(self, bayes=False):

        self._poly_fit_exists = True

        # Select all the events that are in the background regions
        # and make a mask

        all_bkg_masks = []

        total_duration = 0.0

        poly_exposure = 0

        for selection in self._poly_intervals:
            total_duration += selection.duration

            poly_exposure += self.exposure_over_interval(
                selection.start_time, selection.stop_time)

            all_bkg_masks.append(
                np.logical_and(
                    self._arrival_times >= selection.start_time,
                    self._arrival_times <= selection.stop_time,
                ))
        poly_mask = all_bkg_masks[0]

        # If there are multiple masks:
        if len(all_bkg_masks) > 1:
            for mask in all_bkg_masks[1:]:
                poly_mask = np.logical_or(poly_mask, mask)

        # Select the all the events in the poly selections
        # We only need to do this once

        total_poly_events = self._arrival_times[poly_mask]

        # For the channel energies we will need to down select again.
        # We can go ahead and do this to avoid repeated computations

        total_poly_energies = self._measurement[poly_mask]

        # Now we will find the the best poly order unless the use specified one
        # The total cnts (over channels) is binned to .1 sec intervals

        if self._user_poly_order == -1:

            self._optimal_polynomial_grade = (
                self._unbinned_fit_global_and_determine_optimum_grade(
                    total_poly_events, poly_exposure, bayes=bayes))

            log.info("Auto-determined polynomial order: %d" %
                     self._optimal_polynomial_grade)

        else:

            self._optimal_polynomial_grade = self._user_poly_order

        channels = list(
            range(self._first_channel, self._n_channels + self._first_channel))

        # Check whether we are parallelizing or not

        t_start = self._poly_intervals.start_times
        t_stop = self._poly_intervals.stop_times

        if threeML_config["parallel"]["use_parallel"]:

            def worker(channel):
                channel_mask = total_poly_energies == channel

                # Mask background events and current channel
                # poly_chan_mask = np.logical_and(poly_mask, channel_mask)
                # Select the masked events

                current_events = total_poly_events[channel_mask]

                polynomial, _ = unbinned_polyfit(
                    current_events,
                    self._optimal_polynomial_grade,
                    t_start,
                    t_stop,
                    poly_exposure,
                    bayes=bayes)

                return polynomial

            client = ParallelClient()

            polynomials = client.execute_with_progress_bar(
                worker,
                channels,
                name=f"Fitting {self._instrument} background")

        else:

            polynomials = []

            for channel in tqdm(channels,
                                desc=f"Fitting {self._instrument} background"):
                channel_mask = total_poly_energies == channel

                # Mask background events and current channel
                # poly_chan_mask = np.logical_and(poly_mask, channel_mask)
                # Select the masked events

                current_events = total_poly_events[channel_mask]

                polynomial, _ = unbinned_polyfit(
                    current_events,
                    self._optimal_polynomial_grade,
                    t_start,
                    t_stop,
                    poly_exposure,
                    bayes=bayes)

                polynomials.append(polynomial)

        # We are now ready to return the polynomials

        self._polynomials = polynomials
예제 #8
0
    def sample(self, quiet=False):

        assert self._is_setup, "You forgot to setup the sampler!"

        loud = not quiet

        self._update_free_parameters()

        n_dim = len(list(self._free_parameters.keys()))

        # Get starting point

        p0 = self._get_starting_points(1)[0]
        print(p0)
        
        # Deactivate memoization in astromodels, which is useless in this case since we will never use twice the
        # same set of parameters
        with use_astromodels_memoization(False):

            if threeML_config["parallel"]["use-parallel"]:

                c = ParallelClient()
                view = c[:]
                pool = view

            else:

                pool = None

                def logp(theta):

                    return self.get_posterior(theta)

                
                def grad(theta):

                    return numerical_grad(theta, self.get_posterior)
                
                nuts_fn = nuts.NutsSampler_fn_wrapper(self.get_posterior, grad)

                samples, lnprob, epsilon = nuts.nuts6(nuts_fn, self._n_iterations, self._n_adapt, p0)
                
#            sampler = nuts.NUTSSampler(n_dim, self.get_posterior, gradfn=None)  

            # # if a seed is provided, set the random number seed
            # if self._seed is not None:

            #     sampler._random.seed(self._seed)

            # # Run the true sampling

            # samples = sampler.run_mcmc(
            #     initial_state=p0,
            #     M=self._n_iterations,
            #     Madapt=self._n_adapt,
            #     delta=self._delta,
            #     progress=loud,
            # )


        self._sampler = None
        self._raw_samples = samples

        # Compute the corresponding values of the likelihood

        self._test = lnprob
        
        # First we need the prior
        log_prior = np.array([self._log_prior(x) for x in self._raw_samples])

        # Now we get the log posterior and we remove the log prior

        self._log_like_values = np.array([self._log_like(x) for x in self._raw_samples])

        # we also want to store the log probability

        self._log_probability_values = log_prior + self._log_like_values

        self._marginal_likelihood = None

        self._build_samples_dictionary()

        self._build_results()

        # Display results
        if loud:
            self._results.display()

        return self.samples
예제 #9
0
    def sample(self, n_walkers, burn_in, n_samples, quiet=False, seed=None):
        """
        Sample the posterior with the Goodman & Weare's Affine Invariant Markov chain Monte Carlo
        :param n_walkers:
        :param burn_in:
        :param n_samples:
        :param quiet: if False, do not print results
        :param seed: if provided, it is used to seed the random numbers generator before the MCMC

        :return: MCMC samples

        """

        self._update_free_parameters()

        n_dim = len(self._free_parameters.keys())

        # Get starting point

        p0 = self._get_starting_points(n_walkers)

        sampling_procedure = sample_with_progress

        # Deactivate memoization in astromodels, which is useless in this case since we will never use twice the
        # same set of parameters
        with use_astromodels_memoization(False):

            if threeML_config['parallel']['use-parallel']:

                c = ParallelClient()
                view = c[:]

                sampler = emcee.EnsembleSampler(n_walkers,
                                                n_dim,
                                                self.get_posterior,
                                                pool=view)

                # Sampling with progress in parallel is super-slow, so let's
                # use the non-interactive one
                sampling_procedure = sample_without_progress

            else:

                sampler = emcee.EnsembleSampler(n_walkers, n_dim,
                                                self.get_posterior)

            # If a seed is provided, set the random number seed
            if seed is not None:

                sampler._random.seed(seed)

            # Sample the burn-in
            pos, prob, state = sampling_procedure(title="Burn-in",
                                                  p0=p0,
                                                  sampler=sampler,
                                                  n_samples=burn_in)

            # Reset sampler

            sampler.reset()

            # Run the true sampling

            _ = sampling_procedure(title="Sampling",
                                   p0=pos,
                                   sampler=sampler,
                                   n_samples=n_samples,
                                   rstate0=state)

        acc = np.mean(sampler.acceptance_fraction)

        print("\nMean acceptance fraction: %s\n" % acc)

        self._sampler = sampler
        self._raw_samples = sampler.flatchain

        # Compute the corresponding values of the likelihood

        # First we need the prior
        log_prior = map(lambda x: self._log_prior(x), self._raw_samples)

        # Now we get the log posterior and we remove the log prior

        self._log_like_values = sampler.flatlnprobability - log_prior

        # we also want to store the log probability

        self._log_probability_values = sampler.flatlnprobability

        self._marginal_likelihood = None

        self._build_samples_dictionary()

        self._build_results()

        # Display results
        if not quiet:
            self._results.display()

        return self.samples
예제 #10
0
    def sample(self, quiet=False):

        if not self._is_setup:

            log.info("You forgot to setup the sampler!")
            return

        loud = not quiet

        self._update_free_parameters()

        n_dim = len(list(self._free_parameters.keys()))

        # Get starting point

        p0 = self._get_starting_points(self._n_walkers)

        # Deactivate memoization in astromodels, which is useless in this case since we will never use twice the
        # same set of parameters
        with use_astromodels_memoization(False):

            if using_mpi:

                with MPIPoolExecutor() as executor:

                    sampler = zeus.sampler(
                        logprob_fn=self.get_posterior,
                        nwalkers=self._n_walkers,
                        ndim=n_dim,
                        pool=executor,
                    )

                    # if self._seed is not None:

                    #     sampler._random.seed(self._seed)

                    # Run the true sampling
                    log.debug("Start zeus run")
                    _ = sampler.run(
                        p0,
                        self._n_iterations + self._n_burn_in,
                        progress=loud,
                    )
                    log.debug("Zeus run done")

            elif threeML_config["parallel"]["use_parallel"]:

                c = ParallelClient()
                view = c[:]

                sampler = zeus.sampler(
                    logprob_fn=self.get_posterior,
                    nwalkers=self._n_walkers,
                    ndim=n_dim,
                    pool=view,
                )

            else:

                sampler = zeus.sampler(logprob_fn=self.get_posterior,
                                       nwalkers=self._n_walkers,
                                       ndim=n_dim)

            # If a seed is provided, set the random number seed
            # if self._seed is not None:

            #     sampler._random.seed(self._seed)

            # Sample the burn-in
            if not using_mpi:
                log.debug("Start zeus run")
                _ = sampler.run(p0,
                                self._n_iterations + self._n_burn_in,
                                progress=loud)
                log.debug("Zeus run done")

        self._sampler = sampler
        self._raw_samples = sampler.get_chain(flat=True,
                                              discard=self._n_burn_in)

        # Compute the corresponding values of the likelihood

        # First we need the prior
        log_prior = np.array([self._log_prior(x) for x in self._raw_samples])
        self._log_probability_values = sampler.get_log_prob(
            flat=True, discard=self._n_burn_in)

        # np.array(
        #     [self.get_posterior(x) for x in self._raw_samples]
        # )

        # Now we get the log posterior and we remove the log prior

        self._log_like_values = self._log_probability_values - log_prior

        # we also want to store the log probability

        self._marginal_likelihood = None

        self._build_samples_dictionary()

        self._build_results()

        # Display results
        if loud:
            print(self._sampler.summary)
            self._results.display()

        return self.samples
예제 #11
0
    def contours(
        self,
        param_1,
        param_1_minimum,
        param_1_maximum,
        param_1_n_steps,
        param_2=None,
        param_2_minimum=None,
        param_2_maximum=None,
        param_2_n_steps=None,
        progress=True,
        **options
    ):
        """
        Generate confidence contours for the given parameters by stepping for the given number of steps between
        the given boundaries. Call it specifying only source_1, param_1, param_1_minimum and param_1_maximum to
        generate the profile of the likelihood for parameter 1. Specify all parameters to obtain instead a 2d
        contour of param_1 vs param_2

        :param param_1: name of the first parameter
        :param param_1_minimum: lower bound for the range for the first parameter
        :param param_1_maximum: upper bound for the range for the first parameter
        :param param_1_n_steps: number of steps for the first parameter
        :param param_2: name of the second parameter
        :param param_2_minimum: lower bound for the range for the second parameter
        :param param_2_maximum: upper bound for the range for the second parameter
        :param param_2_n_steps: number of steps for the second parameter
        :param progress: (True or False) whether to display progress or not
        :param log: by default the steps are taken linearly. With this optional parameter you can provide a tuple of
        booleans which specify whether the steps are to be taken logarithmically. For example,
        'log=(True,False)' specify that the steps for the first parameter are to be taken logarithmically, while they
        are linear for the second parameter. If you are generating the profile for only one parameter, you can specify
         'log=(True,)' or 'log=(False,)' (optional)
        :param: parallel: whether to use or not parallel computation (default:False)
        :return: a : an array corresponding to the steps for the first parameter
                 b : an array corresponding to the steps for the second parameter (or None if stepping only in one
                 direction)
                 contour : a matrix of size param_1_steps x param_2_steps containing the value of the function at the
                 corresponding points in the grid. If param_2_steps is None (only one parameter), then this reduces to
                 an array of size param_1_steps.
        """

        # Figure out if we are making a 1d or a 2d contour

        if param_2 is None:

            n_dimensions = 1

        else:

            n_dimensions = 2

        # Check the options

        p1log = False
        p2log = False
        parallel = False

        if "log" in options.keys():

            assert len(options["log"]) == n_dimensions, (
                "When specifying the 'log' option you have to provide a "
                + "boolean for each dimension you are stepping on."
            )

            p1log = bool(options["log"][0])

            if param_2 is not None:

                p2log = bool(options["log"][1])

        if "parallel" in options.keys():

            parallel = bool(options["parallel"])

        # Generate the steps

        if p1log:

            param_1_steps = numpy.logspace(math.log10(param_1_minimum), math.log10(param_1_maximum), param_1_n_steps)

        else:

            param_1_steps = numpy.linspace(param_1_minimum, param_1_maximum, param_1_n_steps)

        if n_dimensions == 2:

            if p2log:

                param_2_steps = numpy.logspace(
                    math.log10(param_2_minimum), math.log10(param_2_maximum), param_2_n_steps
                )

            else:

                param_2_steps = numpy.linspace(param_2_minimum, param_2_maximum, param_2_n_steps)

        else:

            # Only one parameter to step through
            # Put param_2_steps as nan so that the worker can realize that it does not have
            # to step through it

            param_2_steps = numpy.array([numpy.nan])

        # Generate the grid

        grid = cartesian([param_1_steps, param_2_steps])

        # Define the worker which will compute the value of the function at a given point in the grid

        # Restore best fit

        self._restore_best_fit()

        # Duplicate the options used for the original minimizer

        new_args = dict(self.minuit.fitarg)

        # Get the minuit names for the parameters

        minuit_param_1 = self._parameter_name_to_minuit_name(param_1)

        if param_2 is None:

            minuit_param_2 = None

        else:

            minuit_param_2 = self._parameter_name_to_minuit_name(param_2)

        # Instance the worker

        contour_worker = ContourWorker(
            self._f, self.minuit.values, new_args, minuit_param_1, minuit_param_2, self.name_to_position
        )

        # We are finally ready to do the computation

        # Serial and parallel computation are slightly different, so check whether we are in one case
        # or the other

        if not parallel:

            # Serial computation

            if progress:

                # Computation with progress bar

                progress_bar = ProgressBar(grid.shape[0])

                # Define a wrapper which will increase the progress before as well as run the actual computation

                def wrap(args):

                    results = contour_worker(args)

                    progress_bar.increase()

                    return results

                # Do the computation

                results = map(wrap, grid)

            else:

                # Computation without the progress bar

                results = map(contour_worker, grid)

        else:

            # Parallel computation

            # Connect to the engines

            client = ParallelClient(**options)

            # Get a balanced view of the engines

            load_balance_view = client.load_balanced_view()

            # Distribute the work among the engines and start it, but return immediately the control
            # to the main thread

            amr = load_balance_view.map_async(contour_worker, grid)

            # print progress
            n_points = grid.flatten().shape[0]
            progress = ProgressBar(n_points)

            # This loop will check from time to time the status of the computation, which is happening on
            # different threads, and update the progress bar

            while not amr.ready():
                # Check and report the status of the computation every second

                time.sleep(1)

                # if (debug):
                #     stdouts = amr.stdout
                #
                #     # clear_output doesn't do much in terminal environments
                #     for stdout, stderr in zip(amr.stdout, amr.stderr):
                #         if stdout:
                #             print "%s" % (stdout[-1000:])
                #         if stderr:
                #             print "%s" % (stderr[-1000:])
                #     sys.stdout.flush()

                progress.animate(amr.progress - 1)

            # If there have been problems, here is where they will be raised

            results = amr.get()

            # Always display 100% at the end

            progress.animate(n_points)

            # Add a new line after the progress bar
            print("\n")

        # Return results

        return (
            param_1_steps,
            param_2_steps,
            numpy.array(results).reshape((param_1_steps.shape[0], param_2_steps.shape[0])),
        )
예제 #12
0
    def _unbinned_fit_global_and_determine_optimum_grade(
            self, events, exposure, bayes=False):
        """
        Provides the ability to find the optimum polynomial grade for *unbinned* events by fitting the
        total (all channels) to 0-2 order polynomials and then comparing them via a likelihood ratio test.


        :param events: an event list
        :param exposure: the exposure per event
        :return: polynomial grade
        """

        # Fit the sum of all the channels to determine the optimal polynomial
        # grade

        min_grade = 0
        max_grade = 2
        log_likelihoods = []

        t_start = self._poly_intervals.start_times
        t_stop = self._poly_intervals.stop_times

        log.debug("attempting to find best fit poly with unbinned")

        if threeML_config["parallel"]["use_parallel"]:

            def worker(grade):

                polynomial, log_like = unbinned_polyfit(events,
                                                        grade,
                                                        t_start,
                                                        t_stop,
                                                        exposure,
                                                        bayes=bayes)

                return log_like

            client = ParallelClient()

            log_likelihoods = client.execute_with_progress_bar(
                worker,
                list(range(min_grade, max_grade + 1)),
                name="Finding best polynomial Order")

        else:

            for grade in trange(min_grade,
                                max_grade + 1,
                                desc="Finding best polynomial Order"):
                polynomial, log_like = unbinned_polyfit(events,
                                                        grade,
                                                        t_start,
                                                        t_stop,
                                                        exposure,
                                                        bayes=bayes)

                log_likelihoods.append(log_like)

        # Found the best one
        delta_loglike = np.array([
            2 * (x[0] - x[1])
            for x in zip(log_likelihoods[:-1], log_likelihoods[1:])
        ])

        log.debug(f"log likes {log_likelihoods}")
        log.debug(f" delta loglikes {delta_loglike}")

        delta_threshold = 9.0

        mask = delta_loglike >= delta_threshold

        if len(mask.nonzero()[0]) == 0:

            # best grade is zero!
            best_grade = 0

        else:

            best_grade = mask.nonzero()[0][-1] + 1

        return best_grade
예제 #13
0
    def _fit_global_and_determine_optimum_grade(self,
                                                cnts,
                                                bins,
                                                exposure,
                                                bayes=False):
        """
        Provides the ability to find the optimum polynomial grade for *binned* counts by fitting the
        total (all channels) to 0-4 order polynomials and then comparing them via a likelihood ratio test.


        :param cnts: counts per bin
        :param bins: the bins used
        :param exposure: exposure per bin
        :param bayes:
        :return: polynomial grade
        """

        min_grade = 0
        max_grade = 4
        log_likelihoods = []

        log.debug("attempting to find best poly with binned data")

        if threeML_config["parallel"]["use_parallel"]:

            def worker(grade):

                polynomial, log_like = polyfit(bins,
                                               cnts,
                                               grade,
                                               exposure,
                                               bayes=bayes)

                return log_like

            client = ParallelClient()

            log_likelihoods = client.execute_with_progress_bar(
                worker,
                list(range(min_grade, max_grade + 1)),
                name="Finding best polynomial Order")

        else:

            for grade in trange(min_grade,
                                max_grade + 1,
                                desc="Finding best polynomial Order"):
                polynomial, log_like = polyfit(bins,
                                               cnts,
                                               grade,
                                               exposure,
                                               bayes=bayes)

                log_likelihoods.append(log_like)

        # Found the best one
        delta_loglike = np.array([
            2 * (x[0] - x[1])
            for x in zip(log_likelihoods[:-1], log_likelihoods[1:])
        ])

        log.debug(f"log likes {log_likelihoods}")
        log.debug(f" delta loglikes {delta_loglike}")

        delta_threshold = 9.0

        mask = delta_loglike >= delta_threshold

        if len(mask.nonzero()[0]) == 0:

            # best grade is zero!
            best_grade = 0

        else:

            best_grade = mask.nonzero()[0][-1] + 1

        return best_grade
    def _fit_polynomials(self, bayes=False):
        """
        fits a polynomial to all channels over the input time intervals

        :param fit_intervals: str input intervals
        :return:
        """

        # mark that we have fit a poly now

        self._poly_fit_exists = True

        # we need to adjust the selection to the true intervals of the time-binned spectra

        tmp_poly_intervals = self._poly_intervals
        poly_intervals = self._adjust_to_true_intervals(tmp_poly_intervals)
        self._poly_intervals = poly_intervals

        # now lets get all the counts, exposure and midpoints for the
        # selection

        selected_counts = []
        selected_exposure = []
        selected_midpoints = []

        for selection in poly_intervals:

            # get the mask of these bins

            mask = self._select_bins(selection.start_time, selection.stop_time)

            # the counts will be (time, channel) here,
            # so the mask is selecting time.
            # a sum along axis=0 is a sum in time, while axis=1 is a sum in energy

            selected_counts.extend(
                self._binned_spectrum_set.counts_per_bin[mask])

            selected_exposure.extend(
                self._binned_spectrum_set.exposure_per_bin[mask])
            selected_midpoints.extend(
                self._binned_spectrum_set.time_intervals.mid_points[mask]
            )

        selected_counts = np.array(selected_counts)
        selected_midpoints = np.array(selected_midpoints)
        selected_exposure = np.array(selected_exposure)

        # Now we will find the the best poly order unless the use specified one
        # The total cnts (over channels) is binned

        if self._user_poly_order == -1:

            self._optimal_polynomial_grade = (
                self._fit_global_and_determine_optimum_grade(
                    selected_counts.sum(axis=1),
                    selected_midpoints,
                    selected_exposure,
                    bayes=bayes,
                )
            )

            log.info(
                "Auto-determined polynomial order: %d"
                % self._optimal_polynomial_grade
            )

        else:

            self._optimal_polynomial_grade = self._user_poly_order

        if threeML_config["parallel"]["use_parallel"]:

            def worker(counts):

                with silence_console_log():
                    polynomial, _ = polyfit(
                        selected_midpoints,
                        counts,
                        self._optimal_polynomial_grade,
                        selected_exposure,
                        bayes=bayes,
                    )

                return polynomial

            client = ParallelClient()

            polynomials = client.execute_with_progress_bar(
                worker, selected_counts.T, name=f"Fitting {self._instrument} background")

        else:

            polynomials = []

            # now fit the light curve of each channel
            # and save the estimated polynomial

            for counts in tqdm(
                selected_counts.T, desc=f"Fitting {self._instrument} background"
            ):

                with silence_console_log():
                    polynomial, _ = polyfit(
                        selected_midpoints,
                        counts,
                        self._optimal_polynomial_grade,
                        selected_exposure,
                        bayes=bayes,
                    )

                    polynomials.append(polynomial)

        self._polynomials = polynomials
예제 #15
0
    def _fit_polynomials(self, bayes=False):
        """

        Binned fit to each channel. Sets the polynomial array that will be used to compute
        counts over an interval



        :return:
        """

        self._poly_fit_exists = True

        # Select all the events that are in the background regions
        # and make a mask

        all_bkg_masks = []

        for selection in self._poly_intervals:
            all_bkg_masks.append(
                np.logical_and(
                    self._arrival_times >= selection.start_time,
                    self._arrival_times <= selection.stop_time,
                ))
        poly_mask = all_bkg_masks[0]

        # If there are multiple masks:
        if len(all_bkg_masks) > 1:
            for mask in all_bkg_masks[1:]:
                poly_mask = np.logical_or(poly_mask, mask)

        # Select the all the events in the poly selections
        # We only need to do this once

        total_poly_events = self._arrival_times[poly_mask]

        # For the channel energies we will need to down select again.
        # We can go ahead and do this to avoid repeated computations

        total_poly_energies = self._measurement[poly_mask]

        # This calculation removes the unselected portion of the light curve
        # so that we are not fitting zero counts. It will be used in the channel calculations
        # as well

        bin_width = 1.0  # seconds
        these_bins = np.arange(self._start_time, self._stop_time, bin_width)

        cnts, bins = np.histogram(total_poly_events, bins=these_bins)

        # Find the mean time of the bins and calculate the exposure in each bin
        mean_time = []
        exposure_per_bin = []
        for i in range(len(bins) - 1):
            m = np.mean((bins[i], bins[i + 1]))
            mean_time.append(m)

            exposure_per_bin.append(
                self.exposure_over_interval(bins[i], bins[i + 1]))

        mean_time = np.array(mean_time)

        exposure_per_bin = np.array(exposure_per_bin)

        # Remove bins with zero counts
        all_non_zero_mask = []

        for selection in self._poly_intervals:
            all_non_zero_mask.append(
                np.logical_and(mean_time >= selection.start_time,
                               mean_time <= selection.stop_time))

        non_zero_mask = all_non_zero_mask[0]
        if len(all_non_zero_mask) > 1:
            for mask in all_non_zero_mask[1:]:
                non_zero_mask = np.logical_or(mask, non_zero_mask)

        # Now we will find the the best poly order unless the use specified one
        # The total cnts (over channels) is binned to .1 sec intervals

        if self._user_poly_order == -1:

            self._optimal_polynomial_grade = (
                self._fit_global_and_determine_optimum_grade(
                    cnts[non_zero_mask],
                    mean_time[non_zero_mask],
                    exposure_per_bin[non_zero_mask],
                    bayes=bayes))

            log.info("Auto-determined polynomial order: %d" %
                     self._optimal_polynomial_grade)

        else:

            self._optimal_polynomial_grade = self._user_poly_order

        channels = list(
            range(self._first_channel, self._n_channels + self._first_channel))

        if threeML_config["parallel"]["use_parallel"]:

            def worker(channel):

                channel_mask = total_poly_energies == channel

                # Mask background events and current channel
                # poly_chan_mask = np.logical_and(poly_mask, channel_mask)
                # Select the masked events

                current_events = total_poly_events[channel_mask]

                cnts, bins = np.histogram(current_events, bins=these_bins)

                polynomial, _ = polyfit(mean_time[non_zero_mask],
                                        cnts[non_zero_mask],
                                        self._optimal_polynomial_grade,
                                        exposure_per_bin[non_zero_mask],
                                        bayes=bayes)

                return polynomial

            client = ParallelClient()

            polynomials = client.execute_with_progress_bar(
                worker,
                channels,
                name=f"Fitting {self._instrument} background")

        else:

            polynomials = []

            for channel in tqdm(channels,
                                desc=f"Fitting {self._instrument} background"):

                channel_mask = total_poly_energies == channel

                # Mask background events and current channel
                # poly_chan_mask = np.logical_and(poly_mask, channel_mask)
                # Select the masked events

                current_events = total_poly_events[channel_mask]

                # now bin the selected channel counts

                cnts, bins = np.histogram(current_events, bins=these_bins)

                # Put data to fit in an x vector and y vector

                polynomial, _ = polyfit(mean_time[non_zero_mask],
                                        cnts[non_zero_mask],
                                        self._optimal_polynomial_grade,
                                        exposure_per_bin[non_zero_mask],
                                        bayes=bayes)

                polynomials.append(polynomial)

        # We are now ready to return the polynomials

        self._polynomials = polynomials
예제 #16
0
    def get_contours(self,
                     param_1,
                     param_1_minimum,
                     param_1_maximum,
                     param_1_n_steps,
                     param_2=None,
                     param_2_minimum=None,
                     param_2_maximum=None,
                     param_2_n_steps=None,
                     progress=True,
                     **options):
        """
        Generate confidence contours for the given parameters by stepping for the given number of steps between
        the given boundaries. Call it specifying only source_1, param_1, param_1_minimum and param_1_maximum to
        generate the profile of the likelihood for parameter 1. Specify all parameters to obtain instead a 2d
        contour of param_1 vs param_2.

        NOTE: if using parallel computation, param_1_n_steps must be an integer multiple of the number of running
        engines. If that is not the case, the code will reduce the number of steps to match that requirement, and
        issue a warning

        :param param_1: fully qualified name of the first parameter or parameter instance
        :param param_1_minimum: lower bound for the range for the first parameter
        :param param_1_maximum: upper bound for the range for the first parameter
        :param param_1_n_steps: number of steps for the first parameter
        :param param_2: fully qualified name of the second parameter or parameter instance
        :param param_2_minimum: lower bound for the range for the second parameter
        :param param_2_maximum: upper bound for the range for the second parameter
        :param param_2_n_steps: number of steps for the second parameter
        :param progress: (True or False) whether to display progress or not
        :param log: by default the steps are taken linearly. With this optional parameter you can provide a tuple of
                    booleans which specify whether the steps are to be taken logarithmically. For example,
                    'log=(True,False)' specify that the steps for the first parameter are to be taken logarithmically,
                    while they are linear for the second parameter. If you are generating the profile for only one
                    parameter, you can specify 'log=(True,)' or 'log=(False,)' (optional)
        :return: a tuple containing an array corresponding to the steps for the first parameter, an array corresponding
                 to the steps for the second parameter (or None if stepping only in one direction), a matrix of size
                 param_1_steps x param_2_steps containing the value of the function at the corresponding points in the
                 grid. If param_2_steps is None (only one parameter), then this reduces to an array of
                 size param_1_steps.
        """

        if hasattr(param_1, "value"):

            # Substitute with the name
            param_1 = param_1.path

        if hasattr(param_2, "value"):

            param_2 = param_2.path

        # Check that the parameters exist
        assert param_1 in self._likelihood_model.free_parameters, (
            "Parameter %s is not a free parameters of the "
            "current model" % param_1)

        if param_2 is not None:
            assert param_2 in self._likelihood_model.free_parameters, (
                "Parameter %s is not a free parameters of the "
                "current model" % param_2)

        # Check that we have a valid fit

        assert (
            self._current_minimum is not None
        ), "You have to run the .fit method before calling get_contours."

        # Then restore the best fit

        self._minimizer.restore_best_fit()

        # Check minimal assumptions about the procedure

        assert not (param_1
                    == param_2), "You have to specify two different parameters"

        assert (param_1_minimum <
                param_1_maximum), "Minimum larger than maximum for parameter 1"

        min1, max1 = self.likelihood_model[param_1].bounds

        if min1 is not None:

            assert param_1_minimum >= min1, (
                "Requested low range for parameter %s (%s) "
                "is below parameter minimum (%s)" %
                (param_1, param_1_minimum, min1))

        if max1 is not None:

            assert param_1_maximum <= max1, (
                "Requested hi range for parameter %s (%s) "
                "is above parameter maximum (%s)" %
                (param_1, param_1_maximum, max1))

        if param_2 is not None:

            min2, max2 = self.likelihood_model[param_2].bounds

            if min2 is not None:

                assert param_2_minimum >= min2, (
                    "Requested low range for parameter %s (%s) "
                    "is below parameter minimum (%s)" %
                    (param_2, param_2_minimum, min2))

            if max2 is not None:

                assert param_2_maximum <= max2, (
                    "Requested hi range for parameter %s (%s) "
                    "is above parameter maximum (%s)" %
                    (param_2, param_2_maximum, max2))

        # Check whether we are parallelizing or not

        if not threeML_config["parallel"]["use-parallel"]:

            a, b, cc = self.minimizer.contours(
                param_1, param_1_minimum, param_1_maximum, param_1_n_steps,
                param_2, param_2_minimum, param_2_maximum, param_2_n_steps,
                progress, **options)

            # Collapse the second dimension of the results if we are doing a 1d contour

            if param_2 is None:
                cc = cc[:, 0]

        else:

            # With parallel computation

            # In order to distribute fairly the computation, the strategy is to parallelize the computation
            # by assigning to the engines one "line" of the grid at the time

            # Connect to the engines

            client = ParallelClient(**options)

            # Get the number of engines

            n_engines = client.get_number_of_engines()

            # Check whether the number of threads is larger than the number of steps in the first direction

            if n_engines > param_1_n_steps:

                n_engines = int(param_1_n_steps)

                custom_warnings.warn(
                    "The number of engines is larger than the number of steps. Using only %s engines."
                    % n_engines,
                    ReducingNumberOfThreads,
                )

            # Check if the number of steps is divisible by the number
            # of threads, otherwise issue a warning and make it so

            if float(param_1_n_steps) % n_engines != 0:
                # Set the number of steps to an integer multiple of the engines
                # (note that // is the floor division, also called integer division)

                param_1_n_steps = (param_1_n_steps // n_engines) * n_engines

                custom_warnings.warn(
                    "Number of steps is not a multiple of the number of threads. Reducing steps to %s"
                    % param_1_n_steps,
                    ReducingNumberOfSteps,
                )

            # Compute the number of splits, i.e., how many lines in the grid for each engine.
            # (note that this is guaranteed to be an integer number after the previous checks)

            p1_split_steps = param_1_n_steps // n_engines

            # Prepare arrays for results

            if param_2 is None:

                # One array
                pcc = np.zeros(param_1_n_steps)

                pa = np.linspace(param_1_minimum, param_1_maximum,
                                 param_1_n_steps)
                pb = None

            else:

                pcc = np.zeros((param_1_n_steps, param_2_n_steps))

                # Prepare the two axes of the parameter space
                pa = np.linspace(param_1_minimum, param_1_maximum,
                                 param_1_n_steps)
                pb = np.linspace(param_2_minimum, param_2_maximum,
                                 param_2_n_steps)

            # Define the parallel worker which will go through the computation

            # NOTE: I only divide
            # on the first parameter axis so that the different
            # threads are more or less well mixed for points close and
            # far from the best fit

            def worker(start_index):

                # Re-create the minimizer

                backup_freeParameters = [
                    x.value for x in list(
                        self._likelihood_model.free_parameters.values())
                ]

                this_minimizer = self._get_minimizer(
                    self.minus_log_like_profile, self._free_parameters)

                this_p1min = pa[start_index * p1_split_steps]
                this_p1max = pa[(start_index + 1) * p1_split_steps - 1]

                # print("From %s to %s" % (this_p1min, this_p1max))

                aa, bb, ccc = this_minimizer.contours(param_1,
                                                      this_p1min,
                                                      this_p1max,
                                                      p1_split_steps,
                                                      param_2,
                                                      param_2_minimum,
                                                      param_2_maximum,
                                                      param_2_n_steps,
                                                      progress=True,
                                                      **options)

                # Restore best fit values

                for val, par in zip(
                        backup_freeParameters,
                        list(self._likelihood_model.free_parameters.values()),
                ):

                    par.value = val

                return ccc

            # Now re-assemble the vector of results taking the different parts from the engines

            all_results = client.execute_with_progress_bar(
                worker, list(range(n_engines)), chunk_size=1)

            for i, these_results in enumerate(all_results):

                if param_2 is None:

                    pcc[i * p1_split_steps:(i + 1) *
                        p1_split_steps] = these_results[:, 0]

                else:

                    pcc[i * p1_split_steps:(i + 1) *
                        p1_split_steps, :] = these_results

            # Give the results the names that the following code expect. These are kept separate for debugging
            # purposes

            cc = pcc
            a = pa
            b = pb

        # Here we have done the computation, in parallel computation or not. Let's make the plot
        # with the contour

        if param_2 is not None:

            # 2d contour

            fig = self._plot_contours("%s" % (param_1), a, "%s" % (param_2, ),
                                      b, cc)

        else:

            # 1d contour (i.e., a profile)

            fig = self._plot_profile("%s" % (param_1), a, cc)

        # Check if we found a better minimum. This shouldn't happen, but in case of very difficult fit
        # it might.

        if self._current_minimum - cc.min() > 0.1:

            if param_2 is not None:

                idx = cc.argmin()

                aidx, bidx = np.unravel_index(idx, cc.shape)

                print(
                    "\nFound a better minimum: %s with %s = %s and %s = %s. Run again your fit starting from here."
                    % (cc.min(), param_1, a[aidx], param_2, b[bidx]))

            else:

                idx = cc.argmin()

                print(
                    "Found a better minimum: %s with %s = %s. Run again your fit starting from here."
                    % (cc.min(), param_1, a[idx]))

        return a, b, cc, fig
예제 #17
0
    def get_contours(
        self,
        param_1,
        param_1_minimum,
        param_1_maximum,
        param_1_n_steps,
        param_2=None,
        param_2_minimum=None,
        param_2_maximum=None,
        param_2_n_steps=None,
        progress=True,
        **options
    ):
        """
        Generate confidence contours for the given parameters by stepping for the given number of steps between
        the given boundaries. Call it specifying only source_1, param_1, param_1_minimum and param_1_maximum to
        generate the profile of the likelihood for parameter 1. Specify all parameters to obtain instead a 2d
        contour of param_1 vs param_2.

        NOTE: if using parallel computation, param_1_n_steps must be an integer multiple of the number of running
        engines. If that is not the case, the code will reduce the number of steps to match that requirement, and
        issue a warning

        :param param_1: fully qualified name of the first parameter or parameter instance
        :param param_1_minimum: lower bound for the range for the first parameter
        :param param_1_maximum: upper bound for the range for the first parameter
        :param param_1_n_steps: number of steps for the first parameter
        :param param_2: fully qualified name of the second parameter or parameter instance
        :param param_2_minimum: lower bound for the range for the second parameter
        :param param_2_maximum: upper bound for the range for the second parameter
        :param param_2_n_steps: number of steps for the second parameter
        :param progress: (True or False) whether to display progress or not
        :param log: by default the steps are taken linearly. With this optional parameter you can provide a tuple of
                    booleans which specify whether the steps are to be taken logarithmically. For example,
                    'log=(True,False)' specify that the steps for the first parameter are to be taken logarithmically,
                    while they are linear for the second parameter. If you are generating the profile for only one
                    parameter, you can specify 'log=(True,)' or 'log=(False,)' (optional)
        :return: a tuple containing an array corresponding to the steps for the first parameter, an array corresponding
                 to the steps for the second parameter (or None if stepping only in one direction), a matrix of size
                 param_1_steps x param_2_steps containing the value of the function at the corresponding points in the
                 grid. If param_2_steps is None (only one parameter), then this reduces to an array of
                 size param_1_steps.
        """

        if hasattr(param_1, "value"):

            # Substitute with the name
            param_1 = param_1.path

        if hasattr(param_2, "value"):

            param_2 = param_2.path

        # Check that the parameters exist
        assert param_1 in self._likelihood_model.free_parameters, (
            "Parameter %s is not a free parameters of the " "current model" % param_1
        )

        if param_2 is not None:
            assert param_2 in self._likelihood_model.free_parameters, (
                "Parameter %s is not a free parameters of the " "current model" % param_2
            )

        # Check that we have a valid fit

        assert self._current_minimum is not None, "You have to run the .fit method before calling get_contours."

        # Then restore the best fit

        self._minimizer._restore_best_fit()

        # Check minimal assumptions about the procedure

        assert not (param_1 == param_2), "You have to specify two different parameters"

        assert param_1_minimum < param_1_maximum, "Minimum larger than maximum for parameter 1"

        if param_2 is not None:
            assert param_2_minimum < param_2_maximum, "Minimum larger than maximum for parameter 2"

        # Check whether we are parallelizing or not

        if not threeML_config["parallel"]["use-parallel"]:

            a, b, cc = self.minimizer.contours(
                param_1,
                param_1_minimum,
                param_1_maximum,
                param_1_n_steps,
                param_2,
                param_2_minimum,
                param_2_maximum,
                param_2_n_steps,
                progress,
                **options
            )

            # Collapse the second dimension of the results if we are doing a 1d contour

            if param_2 is None:
                cc = cc[:, 0]

        else:

            # With parallel computation

            # In order to distribute fairly the computation, the strategy is to parallelize the computation
            # by assigning to the engines one "line" of the grid at the time

            # Connect to the engines

            client = ParallelClient(**options)

            # Get the number of engines

            n_engines = client.get_number_of_engines()

            # Check whether the number of threads is larger than the number of steps in the first direction

            if n_engines > param_1_n_steps:

                n_engines = int(param_1_n_steps)

                custom_warnings.warn(
                    "The number of engines is larger than the number of steps. Using only %s engines." % n_engines,
                    ReducingNumberOfThreads,
                )

            # Check if the number of steps is divisible by the number
            # of threads, otherwise issue a warning and make it so

            if float(param_1_n_steps) % n_engines != 0:
                # Set the number of steps to an integer multiple of the engines
                # (note that // is the floor division, also called integer division)

                param_1_n_steps = (param_1_n_steps // n_engines) * n_engines

                custom_warnings.warn(
                    "Number of steps is not a multiple of the number of threads. Reducing steps to %s"
                    % param_1_n_steps,
                    ReducingNumberOfSteps,
                )

            # Compute the number of splits, i.e., how many lines in the grid for each engine.
            # (note that this is guaranteed to be an integer number after the previous checks)

            p1_split_steps = param_1_n_steps // n_engines

            # Prepare arrays for results

            if param_2 is None:

                # One array
                pcc = numpy.zeros(param_1_n_steps)

                pa = numpy.linspace(param_1_minimum, param_1_maximum, param_1_n_steps)
                pb = None

            else:

                pcc = numpy.zeros((param_1_n_steps, param_2_n_steps))

                # Prepare the two axes of the parameter space
                pa = numpy.linspace(param_1_minimum, param_1_maximum, param_1_n_steps)
                pb = numpy.linspace(param_2_minimum, param_2_maximum, param_2_n_steps)

            # Define the parallel worker which will go through the computation

            # NOTE: I only divide
            # on the first parameter axis so that the different
            # threads are more or less well mixed for points close and
            # far from the best fit

            def worker(start_index):

                # Re-create the minimizer

                # backup_freeParameters = copy.deepcopy(self.freeParameters)

                this_minimizer = self.Minimizer(self.minus_log_like_profile, self._free_parameters)

                this_p1min = pa[start_index * p1_split_steps]
                this_p1max = pa[(start_index + 1) * p1_split_steps - 1]

                # print("From %s to %s" % (this_p1min, this_p1max))

                aa, bb, ccc = this_minimizer.contours(
                    param_1,
                    this_p1min,
                    this_p1max,
                    p1_split_steps,
                    param_2,
                    param_2_minimum,
                    param_2_maximum,
                    param_2_n_steps,
                    False,
                    **options
                )

                # self.freeParameters = backup_freeParameters

                return ccc

            # Get a balanced view of the engines

            lview = client.load_balanced_view()
            # lview.block = True

            # Distribute the work among the engines and start it, but return immediately the control
            # to the main thread

            amr = lview.map_async(worker, range(n_engines))

            # print progress

            progress = ProgressBar(n_engines)

            # This loop will check from time to time the status of the computation, which is happening on
            # different threads, and update the progress bar

            while not amr.ready():
                # Check and report the status of the computation every second

                time.sleep(1 + np.random.uniform(0, 1))

                # if (debug):
                #     stdouts = amr.stdout
                #
                #     # clear_output doesn't do much in terminal environments
                #     for stdout, stderr in zip(amr.stdout, amr.stderr):
                #         if stdout:
                #             print "%s" % (stdout[-1000:])
                #         if stderr:
                #             print "%s" % (stderr[-1000:])
                #     sys.stdout.flush()

                progress.animate(amr.progress - 1)

            # Always display 100% at the end

            progress.animate(n_engines - 1)

            # Add a new line after the progress bar
            print("\n")

            # print("Serial time: %1.f (speed-up: %.1f)" %(amr.serial_time, float(amr.serial_time) / amr.wall_time))

            # Get the results. This will raise exceptions if something wrong happened during the computation.
            # We don't catch it so that the user will be aware of that

            res = amr.get()

            # Now re-assemble the vector of results taking the different parts from the engines

            for i in range(n_engines):

                if param_2 is None:

                    pcc[i * p1_split_steps : (i + 1) * p1_split_steps] = res[i][:, 0]

                else:

                    pcc[i * p1_split_steps : (i + 1) * p1_split_steps, :] = res[i]

            # Give the results the names that the following code expect. These are kept separate for debugging
            # purposes

            cc = pcc
            a = pa
            b = pb

        # Here we have done the computation, in parallel computation or not. Let's make the plot
        # with the contour

        if param_2 is not None:

            # 2d contour

            fig = self._plot_contours("%s" % (param_1), a, "%s" % (param_2,), b, cc)

        else:

            # 1d contour (i.e., a profile)

            fig = self._plot_profile("%s" % (param_1), a, cc)

        # Check if we found a better minimum. This shouldn't happen, but in case of very difficult fit
        # it might.

        if self._current_minimum - cc.min() > 0.1:

            if param_2 is not None:

                idx = cc.argmin()

                aidx, bidx = numpy.unravel_index(idx, cc.shape)

                print(
                    "\nFound a better minimum: %s with %s = %s and %s = %s. Run again your fit starting from here."
                    % (cc.min(), param_1, a[aidx], param_2, b[bidx])
                )

            else:

                idx = cc.argmin()

                print(
                    "Found a better minimum: %s with %s = %s. Run again your fit starting from here."
                    % (cc.min(), param_1, a[idx])
                )

        return a, b, cc, fig