예제 #1
0
def test_progress_bars():

    threeML_config.interface.progress_bars = 'on'

    toggle_progress_bars()

    assert not threeML_config.interface.progress_bars

    toggle_progress_bars()

    assert threeML_config.interface.progress_bars

    silence_progress_bars()

    for i in tqdm(range(10), desc="test"):
        pass

    for i in trange(1, 10, 1, desc="test"):
        pass

    assert not threeML_config.interface.progress_bars

    activate_progress_bars()

    for i in tqdm(range(10), desc="test"):
        pass

    for i in trange(1, 10, 1, desc="test"):
        pass

    assert threeML_config.interface.progress_bars
예제 #2
0
        def execute_with_progress_bar(self,
                                      worker,
                                      items,
                                      chunk_size=None,
                                      name="progress"):

            # Let's make a wrapper which will allow us to recover the order
            def wrapper(x):

                (id, item) = x

                return (id, worker(item))

            items_wrapped = [(i, item) for i, item in enumerate(items)]

            amr = self._interactive_map(wrapper,
                                        items_wrapped,
                                        ordered=False,
                                        chunk_size=chunk_size)

            results = []

            for i, res in enumerate(tqdm(amr, desc=name)):

                results.append(res)

            # Reorder the list according to the id
            return list(
                map(lambda x: x[1], sorted(results, key=lambda x: x[0])))
예제 #3
0
    def _get_errors(self):
        """
        Override this method if the minimizer provide a function to get all errors at once. If instead it provides
        a method to get one error at the time, override the _get_one_error method

        :return: a ordered dictionary parameter_path -> (negative_error, positive_error)
        """

        # TODO: options for other significance levels

        target_delta_log_like = 0.5

        errors = collections.OrderedDict()

        p = tqdm(total=2 * len(self.parameters), desc="Computing errors")

        for parameter_name in self.parameters:

            negative_error = self._get_one_error(
                parameter_name, target_delta_log_like, -1
            )

            p.update(1)

            positive_error = self._get_one_error(
                parameter_name, target_delta_log_like, +1
            )

            p.update(1)

            errors[parameter_name] = (negative_error, positive_error)

        return errors
    def _evaluate(self):
        """

        calculate the best or mean fit of the new function or
        quantity

        :return:
        """
        # if there are independent variables
        if self._independent_variable_range:

            variates = []

            # scroll through the independent variables
            n_iterations = np.product(self._out_shape)

            with use_astromodels_memoization(False):

                variables = list(
                    itertools.product(*self._independent_variable_range))

                if len(variables) > 1:

                    for v in tqdm(variables, desc="Propagating errors"):

                        variates.append(self._propagated_function(*v))

                else:

                    for v in variables:

                        variates.append(self._propagated_function(*v))

        # otherwise just evaluate
        else:

            variates = self._propagated_function()

        # create a variates container

        self._propagated_variates = VariatesContainer(variates,
                                                      self._out_shape,
                                                      self._cl,
                                                      self._transform,
                                                      self._equal_tailed)
예제 #5
0
    def _step2d(self, steps1, steps2):

        log_likes = np.zeros((len(steps1), len(steps2)))

        if threeML_config.interface.progress_bars:

            p = tqdm(total=len(steps1) * len(steps2),
                     desc="Profiling likelihood")

        for i, step1 in enumerate(steps1):

            for j, step2 in enumerate(steps2):

                if self._n_free_parameters > 0:

                    # Profile out the free parameters

                    self._wrapper.set_fixed_values([step1, step2])

                    try:

                        _, this_log_like = self._optimizer.minimize(
                            compute_covar=False
                        )

                    except FitFailed:

                        # If the user is stepping too far it might be that the fit fails. It is usually not a
                        # problem

                        this_log_like = np.nan

                else:

                    # No free parameters, just compute the likelihood

                    this_log_like = self._function(step1, step2)

                log_likes[i, j] = this_log_like

                if threeML_config.interface.progress_bars:
                    p.update(1)

        return log_likes
예제 #6
0
    def _step1d(self, steps1):

        log_likes = np.zeros_like(steps1)

        for i, step in enumerate(tqdm(steps1, desc="Profiling likelihood")):

            if self._n_free_parameters > 0:

                # Profile out the free parameters

                self._wrapper.set_fixed_values(step)

                _, this_log_like = self._optimizer.minimize(
                    compute_covar=False)

            else:

                # No free parameters, just compute the likelihood

                this_log_like = self._function(step)

            log_likes[i] = this_log_like

        return log_likes
예제 #7
0
    def _unbinned_fit_polynomials(self, bayes=False):

        self._poly_fit_exists = True

        # Select all the events that are in the background regions
        # and make a mask

        all_bkg_masks = []

        total_duration = 0.0

        poly_exposure = 0

        for selection in self._poly_intervals:
            total_duration += selection.duration

            poly_exposure += self.exposure_over_interval(
                selection.start_time, selection.stop_time)

            all_bkg_masks.append(
                np.logical_and(
                    self._arrival_times >= selection.start_time,
                    self._arrival_times <= selection.stop_time,
                ))
        poly_mask = all_bkg_masks[0]

        # If there are multiple masks:
        if len(all_bkg_masks) > 1:
            for mask in all_bkg_masks[1:]:
                poly_mask = np.logical_or(poly_mask, mask)

        # Select the all the events in the poly selections
        # We only need to do this once

        total_poly_events = self._arrival_times[poly_mask]

        # For the channel energies we will need to down select again.
        # We can go ahead and do this to avoid repeated computations

        total_poly_energies = self._measurement[poly_mask]

        # Now we will find the the best poly order unless the use specified one
        # The total cnts (over channels) is binned to .1 sec intervals

        if self._user_poly_order == -1:

            self._optimal_polynomial_grade = (
                self._unbinned_fit_global_and_determine_optimum_grade(
                    total_poly_events, poly_exposure, bayes=bayes))

            log.info("Auto-determined polynomial order: %d" %
                     self._optimal_polynomial_grade)

        else:

            self._optimal_polynomial_grade = self._user_poly_order

        channels = list(
            range(self._first_channel, self._n_channels + self._first_channel))

        # Check whether we are parallelizing or not

        t_start = self._poly_intervals.start_times
        t_stop = self._poly_intervals.stop_times

        if threeML_config["parallel"]["use_parallel"]:

            def worker(channel):
                channel_mask = total_poly_energies == channel

                # Mask background events and current channel
                # poly_chan_mask = np.logical_and(poly_mask, channel_mask)
                # Select the masked events

                current_events = total_poly_events[channel_mask]

                polynomial, _ = unbinned_polyfit(
                    current_events,
                    self._optimal_polynomial_grade,
                    t_start,
                    t_stop,
                    poly_exposure,
                    bayes=bayes)

                return polynomial

            client = ParallelClient()

            polynomials = client.execute_with_progress_bar(
                worker,
                channels,
                name=f"Fitting {self._instrument} background")

        else:

            polynomials = []

            for channel in tqdm(channels,
                                desc=f"Fitting {self._instrument} background"):
                channel_mask = total_poly_energies == channel

                # Mask background events and current channel
                # poly_chan_mask = np.logical_and(poly_mask, channel_mask)
                # Select the masked events

                current_events = total_poly_events[channel_mask]

                polynomial, _ = unbinned_polyfit(
                    current_events,
                    self._optimal_polynomial_grade,
                    t_start,
                    t_stop,
                    poly_exposure,
                    bayes=bayes)

                polynomials.append(polynomial)

        # We are now ready to return the polynomials

        self._polynomials = polynomials
    def _fit_polynomials(self, bayes=False):
        """
        fits a polynomial to all channels over the input time intervals

        :param fit_intervals: str input intervals
        :return:
        """

        # mark that we have fit a poly now

        self._poly_fit_exists = True

        # we need to adjust the selection to the true intervals of the time-binned spectra

        tmp_poly_intervals = self._poly_intervals
        poly_intervals = self._adjust_to_true_intervals(tmp_poly_intervals)
        self._poly_intervals = poly_intervals

        # now lets get all the counts, exposure and midpoints for the
        # selection

        selected_counts = []
        selected_exposure = []
        selected_midpoints = []

        for selection in poly_intervals:

            # get the mask of these bins

            mask = self._select_bins(selection.start_time, selection.stop_time)

            # the counts will be (time, channel) here,
            # so the mask is selecting time.
            # a sum along axis=0 is a sum in time, while axis=1 is a sum in energy

            selected_counts.extend(
                self._binned_spectrum_set.counts_per_bin[mask])

            selected_exposure.extend(
                self._binned_spectrum_set.exposure_per_bin[mask])
            selected_midpoints.extend(
                self._binned_spectrum_set.time_intervals.mid_points[mask]
            )

        selected_counts = np.array(selected_counts)
        selected_midpoints = np.array(selected_midpoints)
        selected_exposure = np.array(selected_exposure)

        # Now we will find the the best poly order unless the use specified one
        # The total cnts (over channels) is binned

        if self._user_poly_order == -1:

            self._optimal_polynomial_grade = (
                self._fit_global_and_determine_optimum_grade(
                    selected_counts.sum(axis=1),
                    selected_midpoints,
                    selected_exposure,
                    bayes=bayes,
                )
            )

            log.info(
                "Auto-determined polynomial order: %d"
                % self._optimal_polynomial_grade
            )

        else:

            self._optimal_polynomial_grade = self._user_poly_order

        if threeML_config["parallel"]["use_parallel"]:

            def worker(counts):

                with silence_console_log():
                    polynomial, _ = polyfit(
                        selected_midpoints,
                        counts,
                        self._optimal_polynomial_grade,
                        selected_exposure,
                        bayes=bayes,
                    )

                return polynomial

            client = ParallelClient()

            polynomials = client.execute_with_progress_bar(
                worker, selected_counts.T, name=f"Fitting {self._instrument} background")

        else:

            polynomials = []

            # now fit the light curve of each channel
            # and save the estimated polynomial

            for counts in tqdm(
                selected_counts.T, desc=f"Fitting {self._instrument} background"
            ):

                with silence_console_log():
                    polynomial, _ = polyfit(
                        selected_midpoints,
                        counts,
                        self._optimal_polynomial_grade,
                        selected_exposure,
                        bayes=bayes,
                    )

                    polynomials.append(polynomial)

        self._polynomials = polynomials
예제 #9
0
    def _minimize(self):

        assert (len(self._grid) >
                0), "You need to set up a grid using add_parameter_to_grid"

        if self._2nd_minimization is None:

            raise RuntimeError(
                "You did not setup this global minimizer (GRID). You need to use the .setup() method"
            )

        # For each point in the grid, perform a fit

        parameters = list(self._grid.keys())

        overall_minimum = 1e20
        internal_best_fit_values = None

        n_iterations = np.prod([x.shape for x in list(self._grid.values())])

        if threeML_config.interface.progress_bars:
            p = tqdm(total=n_iterations, desc="Grid Minimization")

        for values_tuple in itertools.product(*list(self._grid.values())):

            # Reset everything to the original values, so that the fit will always start
            # from there, instead that from the values obtained in the last iterations, which
            # might have gone completely awry

            for par_name, par_value in self._original_values.items():

                self.parameters[par_name].value = par_value

            # Now set the parameters in the grid to their starting values

            for i, this_value in enumerate(values_tuple):

                self.parameters[parameters[i]].value = this_value

            # Get a new instance of the minimizer. We need to do this instead of reusing an existing instance
            # because some minimizers (like iminuit) keep internal track of their status, so that reusing
            # a minimizer will create correlation between the different points
            # NOTE: this line necessarily needs to be after the values of the parameters has been set to the
            # point, because the init method of the minimizer instance will use those values to set the starting
            # point for the fit

            _minimizer = self._2nd_minimization.get_instance(self.function,
                                                             self.parameters,
                                                             verbosity=0)

            # Perform fit

            try:

                # We call _minimize() and not minimize() so that the best fit values are
                # in the internal system.

                this_best_fit_values_internal, this_minimum = _minimizer._minimize(
                )

            except:

                # A failure is not a problem here, only if all of the fit fail then we have a problem
                # but this case is handled later

                continue

            # If this minimum is the overall minimum, save the result

            if this_minimum < overall_minimum:

                overall_minimum = this_minimum
                internal_best_fit_values = this_best_fit_values_internal

            # Use callbacks (if any)
            for callback in self._callbacks:

                callback(values_tuple, this_minimum)

            if threeML_config.interface.progress_bars:
                p.update(1)

        if internal_best_fit_values is None:
            log.error("All fit starting from values in the grid have failed!")
            raise AllFitFailed()

        return internal_best_fit_values, overall_minimum
예제 #10
0
def _setup_analysis_dictionaries(
    analysis_results,
    energy_range,
    energy_unit,
    flux_unit,
    use_components,
    components_to_use,
    confidence_level,
    equal_tailed,
    differential,
    sources_to_use,
    include_extended,
):
    """
    helper function to pull out analysis details that are common to flux and plotting functions


    :param analysis_results:
    :param energy_range:
    :param energy_unit:
    :param flux_unit:
    :param use_components:
    :param components_to_use:
    :param confidence_level:
    :param fraction_of_samples:
    :param differential:
    :param sources_to_use:
    :param include_extended:
    :return:
    """

    bayesian_analyses = collections.OrderedDict()
    mle_analyses = collections.OrderedDict()

    # first we split up the bayesian and mle analysis

    mle_sources = collections.OrderedDict()
    bayes_sources = collections.OrderedDict()

    for analysis in analysis_results:

        items = (list(analysis.optimized_model.point_sources.items())
                 if not include_extended else list(
                     analysis.optimized_model.sources.items()))

        for source_name, source in items:

            if source_name in sources_to_use or not sources_to_use:

                if analysis.analysis_type == "MLE":

                    # keep track of duplicate sources

                    mle_sources.setdefault(source_name, []).append(1)

                    if len(mle_sources[source_name]) > 1:
                        name = "%s_%d" % (source_name,
                                          len(mle_sources[source_name]))

                    else:

                        name = source_name

                    try:

                        comps = [
                            c.name
                            for c in source.spectrum.main.composite.functions
                        ]

                    except:

                        comps = []

                    # duplicate components
                    comps = [
                        "%s_n%i" % (s, suffix) if num > 1 else s
                        for s, num in list(collections.Counter(comps).items())
                        for suffix in range(1, num + 1)
                    ]

                    mle_analyses[name] = {
                        "source": source_name,
                        "analysis": analysis,
                        "component_names": comps,
                    }

                else:

                    bayes_sources.setdefault(source_name, []).append(1)

                    # keep track of duplicate sources

                    if len(bayes_sources[source_name]) > 1:
                        name = "%s_%d" % (source_name,
                                          len(bayes_sources[source_name]))

                    else:

                        name = source_name

                    try:

                        comps = [
                            c.name
                            for c in source.spectrum.main.composite.functions
                        ]

                    except:

                        comps = []

                    # duplicate components
                    comps = [
                        "%s_n%i" % (s, suffix) if num > 1 else s
                        for s, num in list(collections.Counter(comps).items())
                        for suffix in range(1, num + 1)
                    ]

                    bayesian_analyses[name] = {
                        "source": source_name,
                        "analysis": analysis,
                        "component_names": comps,
                    }

    # keep track of the number of sources we will use

    num_sources_to_use = 0

    # go through the MLE analysis and build up some fitted sources

    for key in tqdm(list(mle_analyses.keys()), desc="processing MLE analyses"):

        # if we want to use this source

        if (not use_components or ("total" in components_to_use)
                or (not mle_analyses[key]["component_names"])):
            mle_analyses[key][
                "fitted point source"] = FittedPointSourceSpectralHandler(
                    mle_analyses[key]["analysis"],
                    mle_analyses[key]["source"],
                    energy_range,
                    energy_unit,
                    flux_unit,
                    confidence_level,
                    equal_tailed=equal_tailed,
                    is_differential_flux=differential,
                )

            num_sources_to_use += 1

        # see if there are any components to use

        if use_components:

            num_components_to_use = 0

            component_dict = {}

            for component in mle_analyses[key]["component_names"]:

                # if we want to plot all the components

                if not components_to_use:

                    component_dict[
                        component] = FittedPointSourceSpectralHandler(
                            mle_analyses[key]["analysis"],
                            mle_analyses[key]["source"],
                            energy_range,
                            energy_unit,
                            flux_unit,
                            confidence_level,
                            equal_tailed,
                            component=component,
                            is_differential_flux=differential,
                        )

                    num_components_to_use += 1

                else:

                    # otherwise pick off only the ones of interest

                    if component in components_to_use:
                        component_dict[
                            component] = FittedPointSourceSpectralHandler(
                                mle_analyses[key]["analysis"],
                                mle_analyses[key]["source"],
                                energy_range,
                                energy_unit,
                                flux_unit,
                                confidence_level,
                                equal_tailed,
                                component=component,
                                is_differential_flux=differential,
                            )

                        num_components_to_use += 1

            # save these to the dict

            mle_analyses[key]["components"] = component_dict

        # keep track of how many components we need to plot

        if use_components:

            num_sources_to_use += num_components_to_use

            if "total" in components_to_use:
                num_sources_to_use += 1

        # else:
        #
        #     num_sources_to_use += 1

    # repeat for the bayes analyses

    for key in tqdm(list(bayesian_analyses.keys()),
                    desc="processing Bayesian analyses"):

        # if we have a source to use

        if (not use_components or ("total" in components_to_use)
                or (not bayesian_analyses[key]["component_names"])):
            bayesian_analyses[key][
                "fitted point source"] = FittedPointSourceSpectralHandler(
                    bayesian_analyses[key]["analysis"],
                    bayesian_analyses[key]["source"],
                    energy_range,
                    energy_unit,
                    flux_unit,
                    confidence_level,
                    equal_tailed,
                    is_differential_flux=differential,
                )

            num_sources_to_use += 1

        # if we want to use components

        if use_components:

            num_components_to_use = 0

            component_dict = {}

            for component in bayesian_analyses[key]["component_names"]:

                # extracting all components

                if not components_to_use:
                    component_dict[
                        component] = FittedPointSourceSpectralHandler(
                            bayesian_analyses[key]["analysis"],
                            bayesian_analyses[key]["source"],
                            energy_range,
                            energy_unit,
                            flux_unit,
                            confidence_level,
                            equal_tailed,
                            component=component,
                            is_differential_flux=differential,
                        )

                    num_components_to_use += 1

                # or just some of them

                if component in components_to_use:
                    component_dict[
                        component] = FittedPointSourceSpectralHandler(
                            bayesian_analyses[key]["analysis"],
                            bayesian_analyses[key]["source"],
                            energy_range,
                            energy_unit,
                            flux_unit,
                            confidence_level,
                            equal_tailed,
                            component=component,
                            is_differential_flux=differential,
                        )

                    num_components_to_use += 1

            bayesian_analyses[key]["components"] = component_dict

        # keep track of everything we added on

        if use_components and num_components_to_use > 0:

            num_sources_to_use += num_components_to_use

            if "total" in components_to_use:
                num_sources_to_use += 1
        #
        # else:
        #
        #     num_sources_to_use += 1

    # we may have the same source in a bayesian and mle analysis.
    # we want to plot them, but make sure to label them differently.
    # so let's keep track of them

    duplicate_keys = []

    for key in list(mle_analyses.keys()):

        if key in list(bayesian_analyses.keys()):
            duplicate_keys.append(key)

    return mle_analyses, bayesian_analyses, num_sources_to_use, duplicate_keys
예제 #11
0
    def _fit_polynomials(self, bayes=False):
        """

        Binned fit to each channel. Sets the polynomial array that will be used to compute
        counts over an interval



        :return:
        """

        self._poly_fit_exists = True

        # Select all the events that are in the background regions
        # and make a mask

        all_bkg_masks = []

        for selection in self._poly_intervals:
            all_bkg_masks.append(
                np.logical_and(
                    self._arrival_times >= selection.start_time,
                    self._arrival_times <= selection.stop_time,
                ))
        poly_mask = all_bkg_masks[0]

        # If there are multiple masks:
        if len(all_bkg_masks) > 1:
            for mask in all_bkg_masks[1:]:
                poly_mask = np.logical_or(poly_mask, mask)

        # Select the all the events in the poly selections
        # We only need to do this once

        total_poly_events = self._arrival_times[poly_mask]

        # For the channel energies we will need to down select again.
        # We can go ahead and do this to avoid repeated computations

        total_poly_energies = self._measurement[poly_mask]

        # This calculation removes the unselected portion of the light curve
        # so that we are not fitting zero counts. It will be used in the channel calculations
        # as well

        bin_width = 1.0  # seconds
        these_bins = np.arange(self._start_time, self._stop_time, bin_width)

        cnts, bins = np.histogram(total_poly_events, bins=these_bins)

        # Find the mean time of the bins and calculate the exposure in each bin
        mean_time = []
        exposure_per_bin = []
        for i in range(len(bins) - 1):
            m = np.mean((bins[i], bins[i + 1]))
            mean_time.append(m)

            exposure_per_bin.append(
                self.exposure_over_interval(bins[i], bins[i + 1]))

        mean_time = np.array(mean_time)

        exposure_per_bin = np.array(exposure_per_bin)

        # Remove bins with zero counts
        all_non_zero_mask = []

        for selection in self._poly_intervals:
            all_non_zero_mask.append(
                np.logical_and(mean_time >= selection.start_time,
                               mean_time <= selection.stop_time))

        non_zero_mask = all_non_zero_mask[0]
        if len(all_non_zero_mask) > 1:
            for mask in all_non_zero_mask[1:]:
                non_zero_mask = np.logical_or(mask, non_zero_mask)

        # Now we will find the the best poly order unless the use specified one
        # The total cnts (over channels) is binned to .1 sec intervals

        if self._user_poly_order == -1:

            self._optimal_polynomial_grade = (
                self._fit_global_and_determine_optimum_grade(
                    cnts[non_zero_mask],
                    mean_time[non_zero_mask],
                    exposure_per_bin[non_zero_mask],
                    bayes=bayes))

            log.info("Auto-determined polynomial order: %d" %
                     self._optimal_polynomial_grade)

        else:

            self._optimal_polynomial_grade = self._user_poly_order

        channels = list(
            range(self._first_channel, self._n_channels + self._first_channel))

        if threeML_config["parallel"]["use_parallel"]:

            def worker(channel):

                channel_mask = total_poly_energies == channel

                # Mask background events and current channel
                # poly_chan_mask = np.logical_and(poly_mask, channel_mask)
                # Select the masked events

                current_events = total_poly_events[channel_mask]

                cnts, bins = np.histogram(current_events, bins=these_bins)

                polynomial, _ = polyfit(mean_time[non_zero_mask],
                                        cnts[non_zero_mask],
                                        self._optimal_polynomial_grade,
                                        exposure_per_bin[non_zero_mask],
                                        bayes=bayes)

                return polynomial

            client = ParallelClient()

            polynomials = client.execute_with_progress_bar(
                worker,
                channels,
                name=f"Fitting {self._instrument} background")

        else:

            polynomials = []

            for channel in tqdm(channels,
                                desc=f"Fitting {self._instrument} background"):

                channel_mask = total_poly_energies == channel

                # Mask background events and current channel
                # poly_chan_mask = np.logical_and(poly_mask, channel_mask)
                # Select the masked events

                current_events = total_poly_events[channel_mask]

                # now bin the selected channel counts

                cnts, bins = np.histogram(current_events, bins=these_bins)

                # Put data to fit in an x vector and y vector

                polynomial, _ = polyfit(mean_time[non_zero_mask],
                                        cnts[non_zero_mask],
                                        self._optimal_polynomial_grade,
                                        exposure_per_bin[non_zero_mask],
                                        bayes=bayes)

                polynomials.append(polynomial)

        # We are now ready to return the polynomials

        self._polynomials = polynomials
예제 #12
0
def bayesian_blocks_not_unique(tt, ttstart, ttstop, p0):
    # Verify that the input array is one-dimensional
    tt = np.asarray(tt, dtype=float)

    assert tt.ndim == 1

    # Now create the array of unique times

    unique_t = np.unique(tt)

    t = tt
    tstart = ttstart
    tstop = ttstop

    # Create initial cell edges (Voronoi tessellation) using the unique time stamps

    edges = np.concatenate([[tstart], 0.5 * (unique_t[1:] + unique_t[:-1]),
                            [tstop]])

    # The last block length is 0 by definition
    block_length = tstop - edges

    if np.sum((block_length <= 0)) > 1:
        raise RuntimeError(
            "Events appears to be out of order! Check for order, or duplicated events."
        )

    N = unique_t.shape[0]

    # arrays to store the best configuration
    best = np.zeros(N, dtype=float)
    last = np.zeros(N, dtype=int)

    # Pre-computed priors (for speed)
    # eq. 21 from Scargle 2012

    priors = 4 - np.log(73.53 * p0 * np.power(np.arange(1, N + 1), -0.478))

    # Count how many events are in each Voronoi cell

    x, _ = np.histogram(t, edges)

    # Speed tricks: resolve once for all the functions which will be used
    # in the loop
    cumsum = np.cumsum
    log = np.log
    argmax = np.argmax
    numexpr_evaluate = numexpr.evaluate
    arange = np.arange

    # Decide the step for reporting progress
    incr = max(int(float(N) / 100.0 * 10), 1)

    logger.debug("Finding blocks...")

    # This is where the computation happens. Following Scargle et al. 2012.
    # This loop has been optimized for speed:
    # * the expression for the fitness function has been rewritten to
    #  avoid multiple log computations, and to avoid power computations
    # * the use of scipy.weave and numexpr has been evaluated. The latter
    #  gives a big gain (~40%) if used for the fitness function. No other
    #  gain is obtained by using it anywhere else

    # Set numexpr precision to low (more than enough for us), which is
    # faster than high
    oldaccuracy = numexpr.set_vml_accuracy_mode("low")
    numexpr.set_num_threads(1)
    numexpr.set_vml_num_threads(1)

    for R in tqdm(range(N)):
        br = block_length[R + 1]
        T_k = block_length[:R + 1] - br

        # N_k: number of elements in each block
        # This expression has been simplified for the case of
        # unbinned events (i.e., one element in each block)
        # It was:
        N_k = cumsum(x[:R + 1][::-1])[::-1]
        # Now it is:
        # N_k = arange(R + 1, 0, -1)

        # Evaluate fitness function
        # This is the slowest part, which I'm speeding up by using
        # numexpr. It provides a ~40% gain in execution speed.

        fit_vec = numexpr_evaluate(
            """N_k * log(N_k/ T_k) """,
            optimization="aggressive",
            local_dict={
                "N_k": N_k,
                "T_k": T_k
            },
        )

        p = priors[R]

        A_R = fit_vec - p

        A_R[1:] += best[:R]

        i_max = argmax(A_R)

        last[R] = i_max
        best[R] = A_R[i_max]

    numexpr.set_vml_accuracy_mode(oldaccuracy)

    logger.debug("Done\n")

    # Now find blocks
    change_points = np.zeros(N, dtype=int)
    i_cp = N
    ind = N
    while True:
        i_cp -= 1
        change_points[i_cp] = ind

        if ind == 0:
            break

        ind = last[ind - 1]

    change_points = change_points[i_cp:]

    finalEdges = edges[change_points]

    return np.asarray(finalEdges)
예제 #13
0
    def __init__(self):
        """
        holds all the observatories/instruments/filters


        :param library_file:
        """

        # get the filter file

        with h5py.File(get_speclite_filter_library(), "r") as f:

            self._instruments = []

            for observatory in tqdm(f.keys(),
                                    desc="Loading photometric filters"):

                log.debug(f"loading {observatory}")

                sub_dict = {}
                for instrument in f[observatory].keys():

                    sub_dict[instrument] = instrument

                # create a node for the observatory
                this_node = ObservatoryNode(sub_dict)

                # attach it to the object

                if observatory == "2MASS":

                    xx = "TwoMass"

                else:

                    xx = observatory

                setattr(self, xx, this_node)

                # now get the instruments

                for instrument in f[observatory].keys():

                    # update the instruments

                    self._instruments.append(instrument)

                    # create the filter response via speclite

                    this_grp = f[observatory][instrument]
                    filters = []

                    for ff in this_grp.keys():

                        grp = this_grp[ff]

                        this_filter = spec_filter.FilterResponse(
                            wavelength=grp["wavelength"][()] * u.Angstrom,
                            response=grp["transmission"][()],
                            meta=dict(
                                group_name=instrument,
                                band_name=ff,
                            ))

                        filters.append(this_filter)

                    fgroup = spec_filter.FilterSequence(filters)
                    # attach the filters to the observatory

                    setattr(this_node, instrument, fgroup)

        self._instruments.sort()
예제 #14
0
파일: ppc.py 프로젝트: grburgess/twopc
def compute_ppc(analysis: BayesianAnalysis,
                result: BayesianResults,
                n_sims: int,
                file_name: str,
                overwrite: bool = False,
                return_ppc: bool = False) -> Union["PPC", None]:
    """
    Compute a posterior predictive check from a 3ML DispersionLike
    Plugin. The resulting posterior data simulations are stored
    in an HDF5 file which can be read by the PPC class

    :param analysis: 3ML bayesian analysis object
    :param result: 3ML analysis result
    :param n_sims: the number of posterior simulations to create
    :param file_name: the filename to save to
    :param overwrite: to overwrite an existsing file
    :param return_ppc: if true, PPC object will be return directy
    :returns: None
    :rtype:

    """

    update_logging_level("WARNING")

    p = Path(file_name)

    if p.exists() and (not overwrite):

        raise RuntimeError(f"{file_name} already exists!")

    with h5py.File(file_name, 'w', libver='latest') as database:

        # first we collect the real data data and save it so that we will not have to
        # look it up in the future

        data_names = []

        database.attrs['n_sims'] = n_sims

        for data in analysis.data_list.values():

            data_names.append(data.name)
            grp = database.create_group(data.name)
            grp.attrs['exposure'] = data.exposure
            grp.create_dataset('ebounds',
                               data=data.response.ebounds,
                               compression='lzf')
            grp.create_dataset('obs_counts',
                               data=data.observed_counts,
                               compression='lzf')
            grp.create_dataset('bkg_counts',
                               data=data.background_counts,
                               compression='lzf')
            grp.create_dataset('mask', data=data.mask, compression='lzf')

        # select random draws from the posterior

        n_samples = len(result.samples.T)

        if n_samples < n_sims:

            print("too many sims")

            n_sims = n_samples

        choices = np.random.choice(len(result.samples.T),
                                   replace=False,
                                   size=n_sims)

        # for each posterior sample

        with silence_console_log(and_progress_bars=False):

            for j, choice in enumerate(tqdm(choices,
                                            desc="sampling posterior")):

                # get the parameters of the choice

                params = result.samples.T[choice]

                # set the analysis free parameters to the value of the posterior
                for i, (k, v) in enumerate(
                        analysis.likelihood_model.free_parameters.items()):
                    v.value = params[i]

                # create simulated data sets with these free parameters
                sim_dl = DataList(*[
                    data.get_simulated_dataset()
                    for data in analysis.data_list.values()
                ])

                # set the model of the simulated data to the model of the simulation
                for i, data in enumerate(sim_dl.values()):

                    # clone the model for saftey's sake
                    # and set the model. For now we do nothing with this

                    data.set_model(clone_model(analysis.likelihood_model))

                    # store the PPC data in the file
                    grp = database[data_names[i]]
                    grp.create_dataset('ppc_counts_%d' % j,
                                       data=data.observed_counts,
                                       compression='lzf')
                    grp.create_dataset('ppc_background_counts_%d' % j,
                                       data=data.background_counts,
                                       compression='lzf')
                # sim_dls.append(sim_dl)
        if return_ppc:

            return PPC(file_name)
예제 #15
0
    def bin_by_significance(
        cls,
        arrival_times,
        background_getter,
        background_error_getter=None,
        sigma_level=10,
        min_counts=1,
        tstart=None,
        tstop=None,
    ):
        """

        Bin the data to a given significance level for a given background method and sigma
        method. If a background error function is given then it is assumed that the error distribution
        is gaussian. Otherwise, the error distribution is assumed to be Poisson.

        :param background_getter: function of a start and stop time that returns background counts
        :param background_error_getter: function of a start and stop time that returns background count errors
        :param sigma_level: the sigma level of the intervals
        :param min_counts: the minimum counts per bin

        :return:
        """

        if tstart is None:

            tstart = arrival_times.min()

        else:

            tstart = float(tstart)

        if tstop is None:

            tstop = arrival_times.max()

        else:

            tstop = float(tstop)

        starts = []

        stops = []

        # Switching to a fast search
        # Idea inspired by Damien Begue

        # these factors change the time steps
        # in the fast search. should experiment
        if sigma_level > 25:

            increase_factor = 0.5
            decrease_factor = 0.5

        else:

            increase_factor = 0.25
            decrease_factor = 0.25

        current_start = arrival_times[0]

        # first we need to see if the interval provided has enough counts

        _, counts = TemporalBinner._select_events(arrival_times, current_start,
                                                  arrival_times[-1])

        # if it does not, the flag for the big loop never gets set
        end_all_search = not TemporalBinner._check_exceeds_sigma_interval(
            current_start,
            arrival_times[-1],
            counts,
            sigma_level,
            background_getter,
            background_error_getter,
        )

        # We will start the search at the mid point of the whole interval

        mid_point = 0.5 * (arrival_times[-1] + current_start)

        current_stop = mid_point

        # initialize the fast search flag

        end_fast_search = False

        # resolve once for functions used in the loop
        searchsorted = np.searchsorted

        # this is the main loop
        # as long as we have not reached the end of the interval
        # the loop will run

        if threeML_config.interface.progress_bars:

            pbar = tqdm(total=arrival_times.shape[0],
                        desc="Binning by significance")

        while not end_all_search:

            # start of the fast search
            # we reset the flag for the interval
            # having been decreased in the last pass
            decreased_interval = False

            while not end_fast_search:

                # we calculate the sigma of the current region
                _, counts = TemporalBinner._select_events(
                    arrival_times, current_start, current_stop)

                sigma_exceeded = TemporalBinner._check_exceeds_sigma_interval(
                    current_start,
                    current_stop,
                    counts,
                    sigma_level,
                    background_getter,
                    background_error_getter,
                )

                time_step = abs(current_stop - current_start)

                # if we do not exceed the sigma
                # we need to increase the time interval
                if not sigma_exceeded:

                    # however, if in the last pass we had to decrease
                    # the interval, it means we have found where we
                    # we need to start the slow search
                    if decreased_interval:

                        # mark where we are in the list
                        start_idx = searchsorted(arrival_times, current_stop)

                        # end the fast search
                        end_fast_search = True

                    # otherwise we increase the interval
                    else:

                        # unless, we would increase it too far
                        if (current_stop + time_step *
                                increase_factor) >= arrival_times[-1]:

                            # mark where we are in the interval
                            start_idx = searchsorted(arrival_times,
                                                     current_stop)

                            # then we also want to go ahead and get out of the fast search
                            end_fast_search = True

                        else:

                            # increase the interval
                            current_stop += time_step * increase_factor

                # if we did exceede the sigma level we will need to step
                # back in time to find where it was NOT exceeded
                else:

                    # decrease the interval
                    current_stop -= time_step * decrease_factor

                    # inform the loop that we have been back stepping
                    decreased_interval = True

            # Now we are ready for the slow forward search
            # where we count up all the photons

            # we have already counted up the photons to this point
            total_counts = counts

            # start searching from where the fast search ended
            if threeML_config.interface.progress_bars:
                pbar.update(counts)

            for time in arrival_times[start_idx:]:

                total_counts += 1
                if threeML_config.interface.progress_bars:
                    pbar.update(1)
                if total_counts < min_counts:

                    continue

                else:

                    # first use the background function to know the number of background counts
                    bkg = background_getter(current_start, time)

                    sig = Significance(total_counts, bkg)

                    if background_error_getter is not None:

                        bkg_error = background_error_getter(
                            current_start, time)

                        sigma = sig.li_and_ma_equivalent_for_gaussian_background(
                            bkg_error)[0]

                    else:

                        sigma = sig.li_and_ma()[0]

                        # now test if we have enough sigma

                    if sigma >= sigma_level:

                        # if we succeeded we want to mark the time bins
                        stops.append(time)

                        starts.append(current_start)

                        # set up the next fast search
                        # by looking past this interval
                        current_start = time

                        current_stop = 0.5 * (arrival_times[-1] + time)

                        end_fast_search = False

                        # get out of the for loop
                        break

            # if we never exceeded the sigma level by the
            # end of the search, we never will
            if end_fast_search:

                # so lets kill the main search
                end_all_search = True

        if not starts:

            log.error(
                "The requested sigma level could not be achieved in the interval. Try decreasing it."
            )

        else:

            return cls.from_starts_and_stops(starts, stops)
    def download(
        self,
        remote_filename,
        destination_path: str,
        new_filename=None,
        progress=True,
        compress=False,
    ):

        assert (remote_filename in self.files
                ), "File %s is not contained in this directory (%s)" % (
                    remote_filename,
                    self._request_result.url,
                )

        destination_path: Path = sanitize_filename(destination_path,
                                                   abspath=True)

        assert path_exists_and_is_directory(destination_path), (
            f"Provided destination {destination_path} does not exist or "
            "is not a directory")

        # If no filename is specified, use the same name that the file has on the remote server

        if new_filename is None:
            new_filename: str = remote_filename.split("/")[-1]

        # Get the fully qualified path for the remote and the local file

        remote_path: str = self._request_result.url + remote_filename
        local_path: Path = destination_path / new_filename

        # Ask the server for the file, but do not download it just yet
        # (stream=True will get the HTTP header but nothing else)
        # Use stream=True for two reasons:
        # * so that the file is not downloaded all in memory before being written to the disk
        # * so that we can report progress is requested

        this_request = requests.get(remote_path, stream=True)

        # Figure out the size of the file

        file_size = int(this_request.headers["Content-Length"])

        log.debug(f"downloading {remote_filename} of size {file_size}")

        # Now check if we really need to download this file

        if compress:
            # Add a .gz at the end of the file path

            log.debug(
                f"file {remote_filename} will be downloaded and compressed")

            local_path: Path = Path(f"{local_path}.gz")

        if file_existing_and_readable(local_path):

            local_size = os.path.getsize(local_path)

            if local_size == file_size or compress:
                # if the compressed file already exists
                # it will have a smaller size

                # No need to download it again

                log.info(f"file {remote_filename} is already downloaded!")

                return local_path

        if local_path.is_file():

            first_byte = os.path.getsize(local_path)

        else:

            first_byte = 0

        # Chunk size shouldn't bee too small otherwise we are causing a bottleneck in the download speed
        chunk_size = 1024 * 10

        # If the user wants to compress the file, use gzip, otherwise the normal opener
        if compress:

            import gzip

            opener = gzip.open

        else:

            opener = open

        if threeML_config["interface"]["progress_bars"]:

            # Set a title for the progress bar
            bar_title = "Downloading %s" % new_filename

            total_size = int(this_request.headers.get('content-length', 0))

            bar = tqdm(
                initial=first_byte,
                unit_scale=True,
                unit_divisor=1024,
                unit="B",
                total=int(this_request.headers["Content-Length"]),
                desc=bar_title,
            )

            with opener(local_path, "wb") as f:

                for chunk in this_request.iter_content(chunk_size=chunk_size):

                    if chunk:  # filter out keep-alive new chunks

                        f.write(chunk)
                        bar.update(len(chunk))

            this_request.close()
            bar.close()

        else:

            with opener(local_path, "wb") as f:

                for chunk in this_request.iter_content(chunk_size=chunk_size):

                    if chunk:  # filter out keep-alive new chunks

                        f.write(chunk)

            this_request.close()

        return local_path
예제 #17
0
def download_files_from_directory_ftp(ftp_url,
                                      destination_directory,
                                      filenames=None,
                                      namefilter=None):
    # Parse url
    tokens = urllib.parse.urlparse(ftp_url)
    serverAddress = tokens.netloc
    directory = tokens.path

    # if no filename has been specified, connect first to retrieve the list of files to download

    if filenames == None:

        # Connect to server and log in

        ftp = ftplib.FTP(serverAddress, "anonymous", "", "", timeout=60)

        try:

            ftp.login()

        except:
            # Maybe we are already logged in

            try:

                ftp.cwd("/")

            except:

                # nope! don't know what is happening
                raise

        # Move to origin directory

        ftp.cwd(directory)

        # Retrieve list of files

        filenames = []
        ftp.retrlines("NLST", filenames.append)

        # Close connection (will reopen later)

        ftp.close()

    # Download files with progress report

    downloaded_files = []

    for i, filename in enumerate(tqdm(filenames)):

        if namefilter != None and filename.find(namefilter) < 0:

            # Filename does not match, do not download it
            continue

        else:

            local_filename = os.path.join(destination_directory, filename)

            urllib.request.urlretrieve(
                "ftp://%s/%s/%s" % (serverAddress, directory, filename),
                local_filename,
            )

            urllib.request.urlcleanup()

            downloaded_files.append(local_filename)

    return downloaded_files