Example #1
0
def test_threshold_gain_selector():
    selector = ThresholdGainSelector()
    print(selector)

    assert 'NectarCam' in selector.thresholds

    threshold = selector.thresholds['NectarCam']
    good_hg_value = 35
    good_lg_value = 50
    dummy_waveforms = np.ones((2, 1000, 30)) * good_lg_value
    dummy_waveforms[1:] = good_hg_value  #
    dummy_waveforms[0, 500:, 13:15] = threshold + 10

    new_waveforms, gain_mask = selector.select_gains("NectarCam",
                                                     dummy_waveforms)
    assert gain_mask.shape == (1000,)
    assert new_waveforms.shape == (1000, 30)
    assert (new_waveforms[500:] == good_hg_value).all()
    assert (new_waveforms[:500] == good_lg_value).all()

    selector.select_by_sample = True

    new_waveforms, gain_mask = selector.select_gains("NectarCam",
                                                     dummy_waveforms)

    assert new_waveforms.shape == (1000, 30)
    assert (new_waveforms[500:, 13:15] == good_hg_value).all()
    assert (new_waveforms[500:, :13] == good_lg_value).all()
    assert (new_waveforms[500:, 15:] == good_lg_value).all()
    assert gain_mask.shape == new_waveforms.shape

    # test some failures:
    # Camera that doesn't have a threshold:
    with pytest.raises(KeyError):
        selector.select_gains("NonExistantCamera", dummy_waveforms)

    # 3-gain channel input:
    with pytest.raises(ValueError):
        selector.select_gains("NectarCam", np.ones((3, 1000, 30)))

    # 1-gain channel input:
    wf0 = np.ones((1, 1000, 1))
    wf1, gm = selector.select_gains("ASTRICam", wf0)
    assert wf1.shape == (1000,)
    assert gm.shape == (1000,)
Example #2
0
def gain_selection(waveform, signals, peakpos, cam_id, threshold):
    """
    Custom lst calibration.
    Update event.dl1.tel[telescope_id] with calibrated image and peakpos

    Parameters
    ----------
    waveform: array of waveforms of the events
    signals: array of calibrated pixel charges
    peakpos: array of pixel peak positions
    cam_id: str
    threshold: int threshold to change form high gain to low gain
    """
    '''
    combined = signals[0].copy() 
    peaks = peakpos[0].copy()
    for pixel in range(0, combined.size):
            if np.any(waveform[0][pixel] >= threshold):
                combined[pixel] = signals[1][pixel]
                peaks[pixel] = peakpos[1][pixel]
    '''
    ###Gain Selection using ctapipe GainSelector###
    gainsel = ThresholdGainSelector(select_by_sample=True)
    gainsel.thresholds[cam_id] = threshold

    waveform, gainmask = gainsel.select_gains(cam_id, waveform)
    signalmask = np.zeros(waveform.shape[0], dtype=bool)

    for i in range(signalmask.size):
        signalmask[i] = gainmask[i].any() == True

    combined = signals[0].copy()
    combined[signalmask] = signals[1][signalmask]
    peaks = peakpos[0].copy()
    peaks[signalmask] = peakpos[1][signalmask]

    return combined, peaks
Example #3
0
class DL1DataWriter:
    """Writes data using event sources and DL1DataDumpers.

    Provides some options for controlling the output file sizes.
    """
    def __init__(self,
                 event_source_class=None,
                 event_source_settings=None,
                 data_dumper_class=CTAMLDataDumper,
                 data_dumper_settings=None,
                 calibration_settings=None,
                 preselection_cut_function=None,
                 write_mode='parallel',
                 output_file_size=10737418240,
                 events_per_file=None,
                 gain_thresholds=None,
                 save_mc_events=False):
        """Initialize a DL1DataWriter instance.

        Provides some options for controlling the output file sizes.

        Parameters
        ----------
        event_source_class : subclass of ctapipe.io.eventsource.EventSource
            A subclass of EventSource which will be used to load and yield
            events as DataContainers.
        event_source_settings : dict
            A dictionary of kwargs which will be passed into the constructor
            for the EventSource.
        data_dumper_class : subclass of dl1_data_writer.DL1DataDumper
            A subclass of DL1DataDumper which will be used to write events from
            the EventSource to output files.
        data_dumper_settings : dict
            A dictionary of kwargs which will be passed into the constructor
            for the DL1DataDumper.
        calibration_settings : dict
            A dictionary of kwargs which will be passed into the constructor
            for ctapipe.calib.camera.CameraCalibrator.
        preselection_cut_function : function
            A cut function used to determine which events in the input files
            to write to the output files. Takes a
            ctapipe.io.containers.DataContainer describing a single event and
            returns a boolean indicating if it passes the cut. If None, no cut
            will be applied.
        write_mode : str
            Whether to process the data with parallel threads (one per run)
            or in serial. Valid options are 'serial' and 'parallel'.
        output_file_size : int
            Maximum size of each output file. If the total amount of input data
            requested for a given output file exceeds this size, the output
            will be split across multiple files.
        events_per_file : int
            Maximum number of events to write per output file. If the total
            number of input events requested for a given output file exceeds
            this number, the output will be split across multiple files.
        save_mc_events : bool
            Whether to save event data for all monte carlo showers, even for
            events which did not trigger the array (no images were saved).

        """
        self.event_source_class = event_source_class
        self.event_source_settings = (event_source_settings
                                      if event_source_settings else {})

        self.data_dumper_class = data_dumper_class
        self.data_dumper_settings = (data_dumper_settings
                                     if data_dumper_settings else {})
        self.data_dumper_settings['save_mc_events'] = save_mc_events

        self.preselection_cut_function = preselection_cut_function

        if write_mode in ['serial', 'parallel']:
            self.write_mode = write_mode

        self.output_file_size = output_file_size
        self.events_per_file = events_per_file

        self.save_mc_events = save_mc_events

        if self.output_file_size:
            logger.info("Max output file size set at {} bytes. Note that "
                        "this may increase the number of output "
                        "files.".format(self.output_file_size))
        if self.events_per_file:
            logger.info("Max number of output events per file set at {}. Note "
                        "that this may increase the number of output "
                        "files.".format(self.events_per_file))

        if calibration_settings is None:
            self.calibration_settings = {
                'r1_product': 'HESSIOR1Calibrator',
                'extractor_product': 'NeighbourPeakIntegrator'
            }
        else:
            self.calibration_settings = calibration_settings

        self.calibrator = calib.camera.calibrator.CameraCalibrator(
            None, None, **self.calibration_settings)

        self.gain_selector = ThresholdGainSelector(select_by_sample=True)

        if gain_thresholds is None:
            self.gain_thresholds = {
                'LSTCam': 4094,
                'NectarCam': 4094,
                'ASTRICam': 4094,
            }
        else:
            self.gain_thresholds = gain_thresholds

    def process_data(self, run_list):
        """Process data from a list of runs.

        If the selected write mode is parallel, creates one process for
        each requested run and executes them all in parallel.

        If the selected write mode is sequential, executes each run sequentially,
        writing each target one by one.

        Parameters
        ----------
        run_list : list of dicts
            A list of dictionaries, each containing two keys, 'inputs' and
            'target'. 'inputs' points to a list of input filenames (str) which
             are to be loaded. 'target' points to an output filename (str)
             to which the data from the input files should be written.

        """
        if self.write_mode == 'parallel':
            num_processes = len(run_list)
            logger.info(
                "{} parallel processes requested.".format(num_processes))

            logger.info("Creating processes...")
            jobs = []
            for i in range(0, num_processes):
                process = multiprocessing.Process(target=self._process_data,
                                                  args=(run_list[i]['inputs'],
                                                        run_list[i]['target']))
                jobs.append(process)

            logger.info("Starting processes...")
            try:
                # Start all parallel processes
                for j in jobs:
                    j.start()

                # Wait for all processes to complete
                for j in jobs:
                    j.join()
            except KeyboardInterrupt:
                logger.error(
                    "Caught keyboard interrupt, killing all processes...")
                for j in jobs:
                    j.terminate()
        elif self.write_mode == 'serial':
            logger.info("Serial processing requested.")

            for run in run_list:
                logger.info("Starting run for target: {}...".format(
                    run['target']))
                self._process_data(run['inputs'], run['target'])

        logger.info("Done!")

    @staticmethod
    def _get_next_filename(output_filename, output_file_count):
        """Get the next filename in the sequence.

        Parameters
        ----------
        output_filename : str
            The filename of the previous output file generated.
        output_file_count : int
            Number to attach to the current output file.

        Returns
        -------
        str
            Next filename in the sequence

        """
        # Append a trailing digit to get next filename in sequence
        dirname = os.path.dirname(output_filename)
        output_filename, *extensions = os.path.basename(output_filename).split(
            '.')
        if re.search(r'_[0-9]+$', output_filename):
            output_filename = re.sub(r'_[0-9]+$', "_" + str(output_file_count),
                                     output_filename)
        else:
            output_filename = (output_filename + "_" + str(output_file_count))

        for ext in extensions:
            output_filename = output_filename + '.' + ext

        output_filename = os.path.join(dirname, output_filename)

        return output_filename

    def _process_data(self, file_list, output_filename):
        """Write a single output file given a list of input files.

        Parameters
        ----------
        file_list : list
            A list of input filenames (str) to read data from.
        output_filename : str
            Filename of the output file to write data to.

        """
        output_file_count = 1

        data_dumper = self.data_dumper_class(output_filename,
                                             **self.data_dumper_settings)

        for filename in file_list:
            if self.event_source_class:
                event_source = self.event_source_class(
                    filename, **self.event_source_settings)
            else:
                event_source = io.event_source(filename)

            # Write all file-level data if not present
            # Or compare to existing data if already in file
            example_event = next(event_source._generator())
            subarray = example_event.inst.subarray
            mcheader = example_event.mcheader
            data_dumper.prepare_file(filename, subarray, mcheader)

            # Write all events sequentially
            for event in event_source:
                self.calibrator.calibrate(event)
                self.combine_channels(event)
                if (self.preselection_cut_function is not None
                        and not self.preselection_cut_function(event)):
                    continue
                try:
                    data_dumper.dump_event(event)
                except IOError:
                    logger.error("Failed to write event from file "
                                 "{}, skipping...".format(filename))
                    break

                max_events_reached = (
                    (self.events_per_file is not None)
                    and (data_dumper.event_index - 1 >= self.events_per_file))

                max_size_reached = (
                    (self.output_file_size is not None) and (os.path.getsize(
                        data_dumper.output_filename) > self.output_file_size))

                if max_events_reached or max_size_reached:
                    # Reset event count and increment file count
                    output_file_count += 1

                    output_filename = self._get_next_filename(
                        output_filename, output_file_count)

                    # Create a new Data Dumper pointing at a new file
                    # and write file-level data
                    # Will flush + finalize + close file owned by
                    # previous data dumper
                    data_dumper = self.data_dumper_class(
                        output_filename, **self.data_dumper_settings)

                    # Write all file-level data if not present
                    # Or compare to existing data if already in file
                    example_event = next(event_source._generator())
                    subarray = example_event.inst.subarray
                    mcheader = example_event.mcheader
                    data_dumper.prepare_file(filename, subarray, mcheader)

            if self.save_mc_events:
                for mc_event in event_source.file_.iter_mc_events():
                    try:
                        data_dumper.dump_mc_event(
                            mc_event, event_source.file_.header['run'])
                    except IOError:
                        logger.error("Failed to write event from file "
                                     "{}, skipping...".format(filename))
                        break

                    # Check whether to create another file
                    max_events_reached = (
                        (self.events_per_file is not None) and
                        (data_dumper.event_index - 1 >= self.events_per_file))

                    max_size_reached = (
                        (self.output_file_size is not None)
                        and (os.path.getsize(data_dumper.output_filename) >
                             self.output_file_size))

                    if max_events_reached or max_size_reached:
                        # Reset event count and increment file count
                        output_file_count += 1

                        output_filename = self._get_next_filename(
                            output_filename, output_file_count)

                        # Create a new Data Dumper pointing at a new file
                        # and write file-level data
                        data_dumper = self.data_dumper_class(
                            output_filename, **self.data_dumper_settings)

                        # Write all file-level data if not present
                        # Or compare to existing data if already in file
                        example_event = next(event_source._generator())
                        subarray = example_event.inst.subarray
                        temp = io.DataContainer()
                        event_source.fill_mc_information(temp, mc_event)
                        mcheader = temp.mcheader
                        data_dumper.prepare_file(filename, subarray, mcheader)

    def gain_selection(self, waveform, image, peakpos, cam_id, threshold):
        """
        Based on the waveform and threshold, select the proper gain for each pixel.
        By default, the channel 0 is kept.
        If a pixel is saturated (value > threshold in the waveform), the channel 1 is used.

        Parameters
        ----------
        waveform: array of waveforms of the events
        image: array of calibrated pixel charges
        peakpos: array of pixel peak positions
        cam_id: str
        threshold: int threshold to change form high gain to low gain

        Returns
        -------
        combined_image, combined_peakpos: `(numpy.array, numpy.array)`
            combined_image.shape = image.shape[1]
        """

        assert image.shape[0] == 2

        self.gain_selector.thresholds[cam_id] = threshold

        waveform, gain_mask = self.gain_selector.select_gains(cam_id, waveform)
        signal_mask = gain_mask.max(axis=1)

        combined_image = image[0].copy()
        combined_image[signal_mask] = image[1][signal_mask]
        combined_peakpos = peakpos[0].copy()
        combined_peakpos[signal_mask] = peakpos[1][signal_mask]

        return combined_image, combined_peakpos

    def combine_channels(self, event):
        """
        Combine the channels for the image and peakpos arrays in the event.dl1 containers
        The `event.dl1.tel[tel_id].image` and `event.dl1.tel[tel_id].peakpos` are replaced by their combined versions

        Parameters
        ----------
        event: `ctapipe.io.containers.DataContainer`
        """
        for tel_id in event.r0.tels_with_data:
            cam_id = event.inst.subarray.tel[tel_id].camera.cam_id
            if cam_id in self.gain_thresholds:
                waveform = event.r0.tel[tel_id].waveform
                signals = event.dl1.tel[tel_id].image
                peakpos = event.dl1.tel[tel_id].peakpos

                combined_image, combined_peakpos = self.gain_selection(
                    waveform, signals, peakpos, cam_id,
                    self.gain_thresholds[cam_id])
                event.dl1.tel[tel_id].image = combined_image
                event.dl1.tel[tel_id].peakpos = combined_peakpos
            else:
                event.dl1.tel[tel_id].image = event.dl1.tel[tel_id].image[0]
                event.dl1.tel[tel_id].peakpos = event.dl1.tel[tel_id].peakpos[
                    0]