Example #1
0
    def prepare_data(self, data, tl=None):
        """Prepares the data for IceCube by pre-calculating the following
        experimental data fields:

        - sin_dec: float
            The sin value of the declination coordinate.

        and monte-carlo data fields:

        - sin_true_dec: float
            The sin value of the true declination coordinate.

        Parameters
        ----------
        data : DatasetData instance
            The DatasetData instance holding the data as numpy record ndarray.
        tl : TimeLord instance | None
            The TimeLord instance that should be used to time the data
            preparation.
        """
        # Execute all the data preparation functions for this dataset.
        super(I3Dataset, self).prepare_data(data, tl=tl)

        if (data.exp is not None):
            task = 'Appending IceCube-specific data fields to exp data.'
            with TaskTimer(tl, task):
                data.exp.append_field('sin_dec', np.sin(data.exp['dec']))

        # Append sin(dec) and sin(true_dec) to the MC data.
        task = 'Appending IceCube-specific data fields to MC data.'
        with TaskTimer(tl, task):
            data.mc.append_field('sin_dec', np.sin(data.mc['dec']))
            data.mc.append_field('sin_true_dec', np.sin(data.mc['true_dec']))
Example #2
0
    def select_events(self, events, retidxs=False, tl=None):
        """Selects the events within the declination band.

        Parameters
        ----------
        events : instance of DataFieldRecordArray
            The instance of DataFieldRecordArray that holds the event data.
            The following data fields must exist:

            - 'dec' : float
                The declination of the event.
        retidxs : bool
            Flag if also the indices of of the selected events should get
            returned as 1d ndarray.
            Default is False.
        tl : instance of TimeLord | None
            The optional instance of TimeLord that should be used to collect
            timing information about this method.

        Returns
        -------
        selected_events : instance of DataFieldRecordArray
            The instance of DataFieldRecordArray holding only the selected
            events.
        idxs : ndarray of ints
            The indices of the selected events, in case `retidxs` is set to
            True.
        """
        delta_angle = self._delta_angle
        src_arr = self._src_arr

        # Calculates the minus and plus declination around each source and
        # bound it to -90deg and +90deg, respectively.
        src_dec_minus = np.maximum(-np.pi / 2, src_arr['dec'] - delta_angle)
        src_dec_plus = np.minimum(src_arr['dec'] + delta_angle, np.pi / 2)

        # Determine the mask for the events which fall inside the declination
        # window.
        # mask_dec is a (N_sources,N_events)-shaped ndarray.
        with TaskTimer(tl, 'ESM-DecBand: Calculate mask_dec'):
            mask_dec = ((events['dec'] > src_dec_minus[:, np.newaxis]) &
                        (events['dec'] < src_dec_plus[:, np.newaxis]))

        # Determine the mask for the events that fall inside at least one
        # source declination band.
        # mask is a (N_events,)-shaped ndarray.
        with TaskTimer(tl, 'ESM-DecBand: Calculate mask.'):
            mask = np.any(mask_dec, axis=0)

        # Reduce the events according to the mask.
        with TaskTimer(tl, 'ESM-DecBand: Create selected_events.'):
            idxs = events.indices[mask]
            selected_events = events[idxs]

        if (retidxs):
            return (selected_events, idxs)
        return selected_events
Example #3
0
    def select_events(self, events, retidxs=False, tl=None):
        """Selects the events whose psi value is smaller than the value of the
        predefined function.

        Parameters
        ----------
        events : instance of DataFieldRecordArray
            The instance of DataFieldRecordArray that holds the event data.
            The following data fields must exist:

            - <psi_name> : float
                The great circle distance of the event with the source.
            - <*axis_name_list> : float
                The name of the axis required for the function ``func`` to be
                evaluated.

        retidxs : bool
            Flag if also the indices of of the selected events should get
            returned as 1d ndarray.
            Default is False.
        tl : instance of TimeLord | None
            The optional instance of TimeLord that should be used to collect
            timing information about this method.

        Returns
        -------
        selected_events : instance of DataFieldRecordArray
            The instance of DataFieldRecordArray holding only the selected
            events.
        idxs : ndarray of ints
            The indices of the selected events, in case `retidxs` is set to
            True.
        """
        cls_name = classname(self)

        with TaskTimer(tl, '%s: Get psi values.' % (cls_name)):
            psi = events[self._psi_name]

        with TaskTimer(tl, '%s: Get axis data values.' % (cls_name)):
            func_args = [events[axis] for axis in self._axis_name_list]

        with TaskTimer(tl, '%s: Creating mask.' % (cls_name)):
            mask = psi < self._func(*func_args)

        with TaskTimer(tl, '%s: Create selected_events.' % (cls_name)):
            idxs = events.indices[mask]
            selected_events = events[idxs]

        if (retidxs):
            return (selected_events, idxs)
        return selected_events
Example #4
0
    def get_ratio(self, tdm, params=None, tl=None):
        """Calculates the PDF ratio for the given trial events.

        Parameters
        ----------
        tdm : instance of TrialDataManager
            The TrialDataManager instance holding the trial data events for
            which the PDF ratio values should be calculated.
        params : dict | None
            The dictionary holding the parameter names and values for which the
            probability ratio should get calculated.
            This can be ``None``, if the signal and background PDFs do not
            depend on any parameters.
        tl : TimeLord instance | None
            The optional TimeLord instance that should be used to measure
            timing information.

        Returns
        -------
        ratios : (N_events)-shaped numpy ndarray
            The ndarray holding the probability ratio for each event (and each
            source). The dimensionality of the returned ndarray depends on the
            dimensionality of the probability ndarray returned by the
            ``get_prob`` method of the signal PDF object.
        """
        with TaskTimer(tl, 'Get sig prob.'):
            (sigprob, self._cache_siggrads) = self._sig_pdf.get_prob(tdm,
                                                                     params,
                                                                     tl=tl)
        with TaskTimer(tl, 'Get bkg prob.'):
            (bkgprob, self._cache_bkggrads) = self._bkg_pdf.get_prob(tdm,
                                                                     params,
                                                                     tl=tl)

        with TaskTimer(tl, 'Calc PDF ratios.'):
            # Select only the events, where background pdf is greater than zero.
            m = (bkgprob > 0)

            ratios = np.full_like(sigprob, self._zero_bkg_ratio_value)
            ratios[m] = sigprob[m] / bkgprob[m]

        # Store the current state of parameter values and trial data, so that
        # the get_gradient method can verify the consistency of the signal and
        # background probabilities and gradients.
        self._cache_trial_data_state_id = tdm.trial_data_state_id
        self._cache_params_hash = make_params_hash(params)
        self._cache_sigprob = sigprob
        self._cache_bkgprob = bkgprob

        return ratios
Example #5
0
    def get_prob(self, tdm, fitparams=None, tl=None):
        """Calculates the spatial background probability on the sphere of each
        event.

        Parameters
        ----------
        tdm : instance of TrialDataManager
            The TrialDataManager instance holding the trial event data for which
            to calculate the PDF values. The following data fields must exist:

            - 'sin_dec' : float
                The sin(declination) value of the event.
        fitparams : None
            Unused interface parameter.
        tl : TimeLord instance | None
            The optional TimeLord instance that should be used to measure
            timing information.

        Returns
        -------
        prob : 1d ndarray
            The spherical background probability of each data event.
        """
        with TaskTimer(tl, 'Evaluating bkg log-spline.'):
            log_spline_val = self._log_spline(tdm.get_data('sin_dec'))

        prob = 0.5 / np.pi * np.exp(log_spline_val)

        grads = np.array([], dtype=np.float)

        return (prob, grads)
Example #6
0
    def load_grl(self, efficiency_mode=None, tl=None):
        """Loads the good-run-list and returns a structured numpy ndarray with
        the following data fields:

            run : int
                The run number.
            start : float
                The MJD start time of the run.
            stop : float
                The MJD stop time of the run.
            livetime : float
                The livetime in days of the run.
            events : int
                The number of experimental events in the run.

        Parameters
        ----------
        efficiency_mode : str | None
            The efficiency mode the data should get loaded with. Possible values
            are:

                - 'memory':
                    The data will be load in a memory efficient way. This will
                    require more time, because all data records of a file will
                    be loaded sequentially.
                - 'time'
                    The data will be loaded in a time efficient way. This will
                    require more memory, because each data file gets loaded in
                    memory at once.

            The default value is ``'time'``. If set to ``None``, the default
            value will be used.
        tl : TimeLord instance | None
            The TimeLord instance to use to time the data loading procedure.

        Returns
        -------
        grl_data : instance of DataFieldRecordArray
            The DataFieldRecordArray instance holding the good-run-list
            information of the dataset.
        """
        with TaskTimer(tl, 'Loading grl data from disk.'):
            fileloader_grl = create_FileLoader(self.grl_abs_pathfilename_list)
            grl_data = fileloader_grl.load_data(
                efficiency_mode=efficiency_mode)
            grl_data.rename_fields(self._grl_field_name_renaming_dict)

        return grl_data
Example #7
0
def load_pseudo_data(filename, tl=None):
    """Loads the pseudo data for a single trial from the given file name.

    Parameters
    ----------
    filename : str
        The name of the file that contains the pseudo data.
    tl : TimeLord | None
        The instance of TimeLord that should be used to time individual tasks.

    Returns
    -------
    mean_n_sig : float
        The mean number of signal events that was used to generate the pseudo
        data.
    n_sig : int
        The actual total number of signal events in the pseudo data.
    n_bkg_events_list : list of int
        The total number of background events for each data set of the
        pseudo data.
    n_sig_events_list : list of int
        The total number of signal events for each data set of the pseudo data.
    bkg_events_list : list of DataFieldRecordArray instances
        The list of DataFieldRecordArray instances containing the background
        pseudo data events for each data set.
    sig_events_list : list of DataFieldRecordArray instances or None
        The list of DataFieldRecordArray instances containing the signal
        pseudo data events for each data set. If a particular dataset has
        no signal events, the entry for that dataset can be None.
    """
    with TaskTimer(tl, 'Loading pseudo data from file.'):
        with open(filename, 'rb') as fp:
            trial_data = pickle.load(fp)

    return (trial_data['mean_n_sig'], trial_data['n_sig'],
            trial_data['n_bkg_events_list'], trial_data['n_sig_events_list'],
            trial_data['bkg_events_list'], trial_data['sig_events_list'])
Example #8
0
    def get_prob(self, tdm, fitparams=None, tl=None):
        """Calculates the energy probability (in logE) of each event.

        Parameters
        ----------
        tdm : instance of TrialDataManager
            The TrialDataManager instance holding the data events for which the
            probability should be calculated for. The following data fields must
            exist:

            - 'log_energy' : float
                The logarithm of the energy value of the event.
            - 'sin_dec' : float
                The sin(declination) value of the event.

        fitparams : None
            Unused interface parameter.
        tl : TimeLord instance | None
            The optional TimeLord instance that should be used to measure
            timing information.

        Returns
        -------
        prob : 1D (N_events,) shaped ndarray
            The array with the energy probability for each event.
        """
        get_data = tdm.get_data

        logE_binning = self.get_binning('log_energy')
        sinDec_binning = self.get_binning('sin_dec')

        logE_idx = np.digitize(get_data('log_energy'), logE_binning.binedges) - 1
        sinDec_idx = np.digitize(get_data('sin_dec'), sinDec_binning.binedges) - 1

        with TaskTimer(tl, 'Evaluating logE-sinDec histogram.'):
            prob = self._hist_logE_sinDec[(logE_idx,sinDec_idx)]
        return prob
Example #9
0
    def select_events(self, events, retidxs=False, tl=None):
        """Selects the events within the spatial box in right-ascention and
        declination.

        The solid angle dOmega = dRA * dSinDec = dRA * dDec * cos(dec) is a
        function of declination, i.e. for a constant dOmega, the right-ascension
        value has to change with declination.

        Parameters
        ----------
        events : instance of DataFieldRecordArray
            The instance of DataFieldRecordArray that holds the event data.
            The following data fields must exist:

            - 'ra' : float
                The right-ascention of the event.
            - 'dec' : float
                The declination of the event.
        retidxs : bool
            Flag if also the indices of of the selected events should get
            returned as 1d ndarray.
            Default is False.
        tl : instance of TimeLord | None
            The optional instance of TimeLord that should be used to collect
            timing information about this method.

        Returns
        -------
        selected_events : instance of DataFieldRecordArray
            The instance of DataFieldRecordArray holding only the selected
            events.
        idxs : ndarray of ints
            The indices of the selected events, in case `retidxs` is set to
            True.
        """
        delta_angle = self._delta_angle
        src_arr = self._src_arr

        # Get the minus and plus declination around the sources.
        src_dec_minus = np.maximum(-np.pi / 2, src_arr['dec'] - delta_angle)
        src_dec_plus = np.minimum(src_arr['dec'] + delta_angle, np.pi / 2)

        # Calculate the cosine factor for the largest declination distance from
        # the source. We use np.amin here because smaller cosine values are
        # larger angles.
        # cosfact is a (N_sources,)-shaped ndarray.
        cosfact = np.amin(np.cos([src_dec_minus, src_dec_plus]), axis=0)

        # Calculate delta RA, which is a function of declination.
        # dRA is a (N_sources,)-shaped ndarray.
        dRA_half = np.amin([
            np.repeat(2 * np.pi, len(src_arr['ra'])),
            np.fabs(delta_angle / cosfact)
        ],
                           axis=0)

        # Calculate the right-ascension distance of the events w.r.t. the
        # source. We make sure to use the smaller distance on the circle, thus
        # the maximal distance is 180deg, i.e. pi.
        # ra_dist is a (N_sources,N_events)-shaped 2D ndarray.
        with TaskTimer(tl, 'ESM: Calculate ra_dist.'):
            ra_dist = np.fabs(
                np.mod(events['ra'] - src_arr['ra'][:, np.newaxis] + np.pi, 2 *
                       np.pi) - np.pi)

        # Determine the mask for the events which fall inside the
        # right-ascention window.
        # mask_ra is a (N_sources,N_events)-shaped ndarray.
        with TaskTimer(tl, 'ESM: Calculate mask_ra.'):
            mask_ra = ra_dist < dRA_half[:, np.newaxis]

        # Determine the mask for the events which fall inside the declination
        # window.
        # mask_dec is a (N_sources,N_events)-shaped ndarray.
        with TaskTimer(tl, 'ESM: Calculate mask_dec'):
            mask_dec = ((events['dec'] > src_dec_minus[:, np.newaxis]) &
                        (events['dec'] < src_dec_plus[:, np.newaxis]))

        # Determine the mask for the events which fall inside the
        # right-ascension and declination window.
        # mask_sky is a (N_sources,N_events)-shaped ndarray.
        with TaskTimer(tl, 'ESM: Calculate mask_sky.'):
            mask_sky = mask_ra & mask_dec

        # Determine the mask for the events that fall inside at least one
        # source sky window.
        # mask is a (N_events,)-shaped ndarray.
        with TaskTimer(tl, 'ESM: Calculate mask.'):
            mask = np.any(mask_sky, axis=0)

        # Reduce the events according to the mask.
        with TaskTimer(tl, 'ESM: Create selected_events.'):
            # Using an integer indices array for data selection is several
            # factors faster than using a boolean array.
            idxs = events.indices[mask]
            selected_events = events[idxs]

        if (retidxs):
            return (selected_events, idxs)
        return selected_events
Example #10
0
    def generate_events(self,
                        rss,
                        dataset,
                        data,
                        mean=None,
                        poisson=True,
                        tl=None):
        """Generates a `mean` number of background events for the given dataset
        and its data.

        Parameters
        ----------
        rss : instance of RandomStateService
            The instance of RandomStateService that should be used to generate
            random numbers from.
        dataset : instance of Dataset
            The Dataset instance describing the dataset for which background
            events should get generated.
        data : instance of DatasetData
            The DatasetData instance holding the data of the dataset for which
            background events should get generated.
        mean : float | None
            The mean number of background events to generate.
            Can be `None`. In that case the mean number of background events is
            obtained through the `get_mean_func` function.
        poisson : bool
            If set to True (default), the actual number of generated background
            events will be drawn from a Poisson distribution with the given mean
            value of background events.
            If set to False, the argument ``mean`` specifies the actual number
            of generated background events.
        tl : instance of TimeLord | None
            The optional instance of TimeLord that should be used to collect
            timing information about this method.

        Returns
        -------
        n_bkg : int
            The number of generated background events for the data set.
        bkg_events : instance of DataFieldRecordArray
            The instance of DataFieldRecordArray holding the generated
            background events. The number of events can be less than `n_bkg`
            if an event selection method is used.
        """
        tracing_enabled = CFG['debugging']['enable_tracing']

        # Create aliases to avoid dot-lookup.
        self__pre_event_selection_method = self._pre_event_selection_method

        # Check if the data set has changed. In that case need to get new
        # background probabilities for each monte-carlo event and a new mean
        # number of background events.
        data_id = id(data)
        if (self._cache_data_id != data_id):
            if (tracing_enabled):
                logger.debug(
                    f'DatasetData instance id of dataset "{dataset.name}" '
                    f'changed from {self._cache_data_id} to {data_id}')
            # Cache the current id of the data.
            self._cache_data_id = data_id

            # Create a copy of the MC data with all MC data fields removed,
            # except the specified MC data fields to keep for the
            # ``get_mean_func`` and ``get_event_prob_func`` functions.
            keep_field_names = list(
                set(CFG['dataset']['analysis_required_exp_field_names'] +
                    data.exp_field_names + self._keep_mc_data_field_names))
            data_mc = data.mc.copy(keep_fields=keep_field_names)

            if (self._get_mean_func is not None):
                with TaskTimer(tl, 'Calculate total MC background mean.'):
                    self._cache_mean = self._get_mean_func(
                        dataset, data, data_mc)

            with TaskTimer(tl,
                           'Calculate MC background event probability cache.'):
                self._cache_mc_event_bkg_prob = self._get_event_prob_func(
                    dataset, data, data_mc)

            if (self__pre_event_selection_method is not None):
                with TaskTimer(tl, 'Pre-select MC events.'):
                    (self._cache_mc_pre_selected,
                     mc_pre_selected_mask_idxs) =\
                    self__pre_event_selection_method.select_events(
                        data_mc, retidxs=True, tl=tl)
                self._cache_mc_event_bkg_prob_pre_selected = self._cache_mc_event_bkg_prob[
                    mc_pre_selected_mask_idxs]
            else:
                self._cache_mc_pre_selected = data_mc

        if (mean is None):
            if (self._cache_mean is None):
                raise ValueError(
                    'No mean number of background events and no '
                    'get_mean_func were specified! One of the two must be '
                    'specified!')
            mean = self._cache_mean
        else:
            mean = float_cast(
                mean, 'The mean number of background events must '
                'be castable to type float!')

        # Draw the number of background events from a poisson distribution with
        # the given mean number of background events. This will be the number of
        # background events for this data set.
        n_bkg = (int(rss.random.poisson(mean)) if poisson else int(
            np.round(mean, 0)))

        # Apply only event pre-selection before choosing events.
        data_mc_selected = self._cache_mc_pre_selected

        # Calculate the mean number of background events for the pre-selected
        # MC events.
        if (self__pre_event_selection_method is None):
            # No selection at all, use the total mean.
            mean_selected = mean
        else:
            with TaskTimer(tl, 'Calculate selected MC background mean.'):
                mean_selected = self._get_mean_func(dataset, data,
                                                    data_mc_selected)

        # Calculate the actual number of background events for the selected
        # events.
        p_binomial = mean_selected / mean
        with TaskTimer(tl, 'Get p array.'):
            if (self__pre_event_selection_method is None):
                p = self._cache_mc_event_bkg_prob
            else:
                # Pre-selection.
                p = self._cache_mc_event_bkg_prob_pre_selected / p_binomial
        n_bkg_selected = int(np.around(n_bkg * p_binomial, 0))

        # Draw the actual background events from the selected events of the
        # monto-carlo data set.
        with TaskTimer(tl, 'Draw MC background indices.'):
            bkg_event_indices = rss.random.choice(
                data_mc_selected.indices,
                size=n_bkg_selected,
                p=p,
                replace=(not self._unique_events))
        with TaskTimer(tl, 'Select MC background events from indices.'):
            bkg_events = data_mc_selected[bkg_event_indices]

        # Scramble the drawn MC events if requested.
        if (self._data_scrambler is not None):
            with TaskTimer(tl, 'Scramble MC background data.'):
                bkg_events = self._data_scrambler.scramble_data(rss,
                                                                bkg_events,
                                                                copy=False)

        # Remove MC specific data fields from the background events record
        # array. So the result contains only experimental data fields. The list
        # of experimental data fields is defined as the unique set of the
        # required experimental data fields defined by the data set, and the
        # actual experimental data fields (in case there are additional kept
        # data fields by the user).
        with TaskTimer(tl, 'Remove MC specific data fields from MC events.'):
            exp_field_names = list(
                set(CFG['dataset']['analysis_required_exp_field_names'] +
                    data.exp_field_names))
            bkg_events.tidy_up(exp_field_names)

        return (n_bkg, bkg_events)
Example #11
0
def create_pseudo_data_file(ana,
                            rss,
                            filename,
                            mean_n_bkg_list=None,
                            mean_n_sig=0,
                            bkg_kwargs=None,
                            sig_kwargs=None,
                            tl=None):
    """Creates a pickle file that contains the pseudo data for a single trial.

    Parameters
    ----------
    ana : Analysis
        The Analysis instance that should be used to generate the pseudo data.
    rss : RandomStateService
        The RandomStateService instance to use for generating random numbers.
    filename : str
        The data file name into which the generated pseudo data should get
        written to.
    mean_n_bkg_list : list of float | None
        The mean number of background events that should be generated for
        each dataset. If set to None (the default), the background
        generation method needs to obtain this number itself.
    mean_n_sig : float
        The mean number of signal events that should be generated for the
        trial. The actual number of generated events will be drawn from a
        Poisson distribution with this given signal mean as mean.
    bkg_kwargs : dict | None
        Additional keyword arguments for the `generate_events` method of the
        background generation method class. An usual keyword argument is
        `poisson`.
    sig_kwargs : dict | None
        Additional keyword arguments for the `generate_signal_events` method
        of the `SignalGenerator` class. An usual keyword argument is
        `poisson`.
    tl : TimeLord | None
        The instance of TimeLord that should be used to time individual tasks.

    """
    (n_bkg_events_list, bkg_events_list) = ana.generate_background_events(
        rss=rss, mean_n_bkg_list=mean_n_bkg_list, bkg_kwargs=bkg_kwargs, tl=tl)

    (n_sig, n_sig_events_list,
     sig_events_list) = ana.generate_signal_events(rss=rss,
                                                   mean_n_sig=mean_n_sig,
                                                   sig_kwargs=sig_kwargs,
                                                   tl=tl)

    trial_data = dict(mean_n_bkg_list=mean_n_bkg_list,
                      mean_n_sig=mean_n_sig,
                      bkg_kwargs=bkg_kwargs,
                      sig_kwargs=sig_kwargs,
                      n_sig=n_sig,
                      n_bkg_events_list=n_bkg_events_list,
                      n_sig_events_list=n_sig_events_list,
                      bkg_events_list=bkg_events_list,
                      sig_events_list=sig_events_list)

    with TaskTimer(tl, 'Writing pseudo data to file.'):
        with open(filename, 'wb') as fp:
            pickle.dump(trial_data, fp)
Example #12
0
    def load_data(self,
                  keep_fields=None,
                  livetime=None,
                  dtc_dict=None,
                  dtc_except_fields=None,
                  efficiency_mode=None,
                  tl=None):
        """Loads the data, which is described by the dataset. If a good-run-list
        (GRL) is provided for this dataset, only experimental data will be
        selected which matches the GRL.

        Parameters
        ----------
        keep_fields : list of str | None
            The list of user-defined data fields that should get loaded and kept
            in addition to the analysis required data fields.
        livetime : float | None
            If not None, uses this livetime (in days) as livetime for the
            DatasetData instance, otherwise uses the live time from the Dataset
            instance or, if available, the livetime from the good-run-list
            (GRL).
        dtc_dict : dict | None
            This dictionary defines how data fields of specific
            data types should get converted into other data types.
            This can be used to use less memory. If set to None, no data
            convertion is performed.
        dtc_except_fields : str | sequence of str | None
            The sequence of field names whose data type should not get
            converted.
        efficiency_mode : str | None
            The efficiency mode the data should get loaded with. Possible values
            are:

                - 'memory':
                    The data will be load in a memory efficient way. This will
                    require more time, because all data records of a file will
                    be loaded sequentially.
                - 'time'
                    The data will be loaded in a time efficient way. This will
                    require more memory, because each data file gets loaded in
                    memory at once.

            The default value is ``'time'``. If set to ``None``, the default
            value will be used.
        tl : TimeLord instance | None
            The TimeLord instance that should be used to time the data load
            operation.

        Returns
        -------
        data : instance of DatasetData
            A DatasetData instance holding the experimental and monte-carlo
            data of this data set.
        """
        # Load the good-run-list (GRL) data if it is provided for this dataset,
        # and calculate the livetime based on the GRL.
        data_grl = None
        lt = self.livetime
        if (len(self._grl_pathfilename_list) > 0):
            data_grl = self.load_grl(efficiency_mode=efficiency_mode, tl=tl)
            if ('livetime' not in data_grl.field_name_list):
                raise KeyError('The GRL file(s) "%s" has no data field named '
                               '"livetime"!' %
                               (','.join(self._grl_pathfilename_list)))
            lt = np.sum(data_grl['livetime'])

        # Override the livetime if there is a user defined livetime.
        if (livetime is not None):
            lt = livetime

        # Load all the defined data.
        data = I3DatasetData(
            super(I3Dataset,
                  self).load_data(keep_fields=keep_fields,
                                  livetime=lt,
                                  dtc_dict=dtc_dict,
                                  dtc_except_fields=dtc_except_fields,
                                  efficiency_mode=efficiency_mode,
                                  tl=tl), data_grl)

        # Select only the experimental data which fits the good-run-list for
        # this dataset.
        if (data_grl is not None):
            task = 'Selected only the experimental data that matches the GRL '\
                'for dataset "%s".'%(self.name)
            with TaskTimer(tl, task):
                runs = np.unique(data_grl['run'])
                mask = np.isin(data.exp['run'], runs)
                data.exp = data.exp[mask]

        return data