예제 #1
0
    def is_valid(self,
                 gps_time,
                 delta_t=16,
                 dq_bits=(0, 1, 2, 3),
                 inj_bits=(0, 1, 2, 4)):
        """
        For a given `gps_time`, check if is a valid time to sample
        noise from by checking if all data points in the interval
        `[gps_time - delta_t, gps_time + delta_t]` have the specified
        `dq_bits` and `inj_bits` set.
        
        .. seealso:: For more information about the `dq_bits` and
            `inj_bits`, check out the website of the GW Open Science
            Center, which explains these for the case of O1:
            
                https://www.gw-openscience.org/archive/dataset/O1

        Args:
            gps_time (int): The GPS time whose validity we are checking.
            delta_t (int): The number of seconds around `gps_time`
                which we also want to be valid (because the sample will
                be an interval).
            dq_bits (tuple): The Data Quality Bits which one would like
                to require (see note above).
                *For example:* `dq_bits=(0, 1, 2, 3)` means that the
                data quality needs  to pass all tests up to `CAT3`.
            inj_bits (tuple): The Injection Bits which one would like
                to require (see note above).
                *For example:* `inj_bits=(0, 1, 2, 4)` means that only
                continuous wave (CW) injections are permitted; all
                recordings containing any of other type of injection
                will be invalid for sampling.

        Returns:
            `True` if `gps_time` is valid, otherwise `False`.
        """

        # ---------------------------------------------------------------------
        # Perform some basic sanity checks
        # ---------------------------------------------------------------------

        assert isinstance(gps_time, int), \
            'Received GPS time that is not an integer!'
        assert delta_t >= 0, \
            'Received an invalid value for delta_t!'
        assert set(dq_bits).issubset(set(range(7))), \
            'Invalid Data Quality bit specification passed to is_valid()!'
        assert set(inj_bits).issubset(set(range(5))), \
            'Invalid Injection bit specification passed to is_valid()!'

        # ---------------------------------------------------------------------
        # Check if given time is too close to a real event
        # ---------------------------------------------------------------------

        # Get GPS times of all confirmed mergers
        catalog = Catalog()
        real_event_times = [catalog.mergers[_].time for _ in catalog.names]

        # Check if gps_time is too close to any of these times
        if any(abs(gps_time - _) <= delta_t for _ in real_event_times):
            return False

        # ---------------------------------------------------------------------
        # Check if the given time is too close to the edge within its HDF file
        # ---------------------------------------------------------------------

        # Loop over all HDF files to find the one that contains the given
        # gps_time. Here, we do not distinguish between H1 and L1, because
        # we assume that the files for the detectors are aligned on a grid.
        for hdf_file in self.hdf_files:

            # Get the start and end time for the current HDF file
            start_time = hdf_file['start_time']
            end_time = start_time + hdf_file['duration']

            # Find the file that contains the given gps_time
            if start_time < gps_time < end_time:

                # Check if it is far away enough from the edges: If not, it
                # is not a valid time; otherwise we can still stop searching
                if not start_time + delta_t < gps_time < end_time - delta_t:
                    return False
                else:
                    break

        # ---------------------------------------------------------------------
        # Select the environment around the specified time
        # ---------------------------------------------------------------------

        # Map time to indices
        idx_start = self.gps2idx(gps_time) - delta_t
        idx_end = self.gps2idx(gps_time) + delta_t

        # Select the mask intervals
        environment = \
            dict(h1_inj_mask=self.timeline['h1_inj_mask'][idx_start:idx_end],
                 l1_inj_mask=self.timeline['l1_inj_mask'][idx_start:idx_end],
                 h1_dq_mask=self.timeline['h1_dq_mask'][idx_start:idx_end],
                 l1_dq_mask=self.timeline['l1_dq_mask'][idx_start:idx_end])

        # ---------------------------------------------------------------------
        # Data Quality Check
        # ---------------------------------------------------------------------

        # Compute the minimum data quality
        min_dq = sum([2**i for i in dq_bits])

        # Perform the DQ check for H1
        environment['h1_dq_mask'] = environment['h1_dq_mask'] > min_dq
        if not np.all(environment['h1_dq_mask']):
            return False

        # Perform the DQ check for L1
        environment['l1_dq_mask'] = environment['l1_dq_mask'] > min_dq
        if not np.all(environment['l1_dq_mask']):
            return False

        # ---------------------------------------------------------------------
        # Injection Check
        # ---------------------------------------------------------------------

        # Define an array of ones that matches the length of the environment.
        # This  is needed because for a given number N, we  can check if the
        # K-th bit is set by evaluating the expression: N & (1 << K)
        ones = np.ones(2 * delta_t, dtype=np.int32)

        # For each requested injection bit, check if it is set for the whole
        # environment (for both H1 and L1)
        for i in inj_bits:

            # Perform the injection check for H1
            if not np.all(
                    np.bitwise_and(environment['h1_inj_mask'],
                                   np.left_shift(ones, i))):
                return False

            # Perform the injection check for L1
            if not np.all(
                    np.bitwise_and(environment['l1_inj_mask'],
                                   np.left_shift(ones, i))):
                return False

        # If we have not returned False yet, the time must be valid!
        return True
예제 #2
0
    def __init__(self, run_dir, configs={}):
        '''
        Stores config files for pycbc_inference runs

        Parameters
        ----------

        run_dir : string
        configs : dict


        Usage Notes
        -----------

        [1] Compatible with `ConfigWriter`.
        This class is easiest used with the writer it returns.

        [2] Arguments for `sampler.ini` and `inference.ini`
            are formatted in the initialization of this class

        Therefore, when configuring for Injections
        ------------------------------------------
        No special notes

        [3] Arguments for `data.ini` are not formatted in this class,
        but can be when writing it through its ConfigWriter.

        Therefore, when configuring for Events
        --------------------------------------
        Need the following named variables to be provided to the
        ConfigWriter's `write` function:

        gpstime       : int
        H1_frame_file : str
        H1_channel    : str
        L1_frame_file : str
        L1_channel    : str
        V1_frame_file : str
        V1_channel    : str
        sample_rate   : int (power of 2)

        '''
        super(InferenceConfigs, self).__init__(run_dir, configs)

        # Add prior configs
        if 'prior' not in self.configs:
            self.configs['prior'] = {}
        self.add_default_bbh_prior_config()
        self.add_bilby_prior_files_configs()

        # Add configs for injections
        if 'injection' not in self.configs:
            self.configs['injection'] = {}
        self.add_injection_configs()

        # Add event configs
        if 'event' not in self.configs:
            self.configs['event'] = {}
        from pycbc.catalog import Catalog
        self.event_names = Catalog().names
        for event_name in self.event_names:
            self.add_event_configs(event_name)

        # Initialize their config writers
        self.update_config_writers()
예제 #3
0
    def build_timeline(
        self,
        window: int = 32,
        dq_bits: Tuple[int] = (0, 1, 2, 3),
        inj_bits: Tuple[int] = (0, 1, 2, 4),
        chunk_size: int = 100000,
    ) -> np.ndarray:
        #         For a given `gps_time`, check if is a valid time to sampleim
        #         noise from by checking if all data points in the interval
        #         `[gps_time - window / 2, gps_time + window / 2]` have the specified
        #         `dq_bits` and `inj_bits` set.
        """For more information about the `dq_bits` and
            `inj_bits`, check out the website of the GW Open Science
            Center, which explains these for the case of O1:

                https://www.gw-openscience.org/archive/dataset/O1

        Args:
            window : int
                The number of seconds around `gps_time`
                which we also want to be valid (because the sample will
                be an interval).
            dq_bits : Tuple[int]
                The Data Quality Bits which one would like to require 
                (see note above). *For example:* `dq_bits=(0, 1, 2, 3)`
                means that the data quality needs to pass all tests
                up to `CAT3`.
            inj_bits : Tuple[int]s
                The Injection Bits which one would like to require
                (see note above). *For example:* `inj_bits=(0, 1, 2, 4)`
                means that only continuous wave (CW) injections are permitted;
                all recordings containing any of other type of injection
                will be invalid for sampling.

        Returns:
            A boolean array - `True` if the data is valid, otherwise `False`.
        """

        assert isinstance(
            window, int) and window >= 0, 'Received an invalid int for window!'
        assert set(dq_bits).issubset(set(
            range(7))), 'Invalid Data Quality bit specification!'
        assert set(inj_bits).issubset(set(
            range(5))), 'Invalid Injection bit specification!'

        match = self._cache[(self._cache['window'] == window)
                            & (self._cache['dq_bits'] == dq_bits) &
                            (self._cache['inj_bits'] == inj_bits)]

        assert len(match) in (0, 1), "Duplicated detected in timeline cache!"

        if len(match) == 1:
            timeline = self._masks[match.index.item()]
        else:
            # build data quality masks for each detector (window independent)
            masks = self.build_masks(dq_bits, inj_bits, as_array=True)

            timeline = np.zeros((len(masks), self.n_entries - window + 1),
                                dtype=bool)
            assert 0 < chunk_size < (
                self.n_entries - window +
                1), "chunk_size must be smaller than length of timeline."
            n_chunks = int(np.ceil((self.n_entries - window + 1) / chunk_size))

            mask_buffer = np.stack([
                np.arange(window, dtype=np.int32) + i
                for i in range(chunk_size)
            ])
            with tqdm(
                    total=self.n_entries,
                    desc=f'Processing timeline windows',
                    disable=not self.verbose,
            ) as progress:
                # loop through generator that chunks timeline array
                chunker = chunk_counter(self.n_entries, n_chunks, chunk_size,
                                        window)
                for start, end in chunker:

                    # edit timeline mask for all (:) detectors
                    timeline[:, start:end] = masks[:,
                                                   mask_buffer[:end -
                                                               start, :]].all(
                                                                   axis=2)

                    progress.update(end - start)  # update tqdm iters
                    progress.refresh()
                    mask_buffer += chunk_size  # increment buffer matrix for next chunk

                # append with "deadzone" masks for completeness (dead_zone not long enough for full window)
                dead_zone = np.stack([
                    np.array([False] * (window - 1))
                    for _ in range(masks.shape[0])
                ])
                timeline = np.concatenate([timeline, dead_zone], axis=1).all(
                    axis=0)  # np.all down ifo dim
                progress.update(window - 1)

            # Get GPS times of all confirmed mergers and filter if within delta_t of event time
            catalog = Catalog()
            real_event_times = [
                merger.time for merger in catalog.mergers.values()
            ]
            event_mask = np.array([
                list(
                    range(self.gps2idx(event_time - (window / 2)),
                          self.gps2idx(event_time + (window / 2))))
                for event_time in real_event_times
                if self.gps_start_time < event_time < self.gps_end_time
            ])

            timeline[event_mask] = False

            # add timeline to cache
            metadata = [{
                'window': window,
                'dq_bits': dq_bits,
                'inj_bits': inj_bits
            }]
            self._cache = self._cache.append(metadata, ignore_index=True)
            self._masks[self._cache.index[-1]] = timeline

        return timeline
    # Make sure the output directory exists
    output_dir = os.path.join('.', 'output')
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)

    # Construct path to results file and open it to ensure its empty
    results_file = os.path.join(output_dir, 'real_events.hdf')
    with h5py.File(results_file, 'w'):
        pass

    # -------------------------------------------------------------------------
    # Create an event catalog and loop over all events
    # -------------------------------------------------------------------------

    # Set up a new catalog
    catalog = Catalog()

    # Loop over the events it contains
    for event in sorted(catalog.names):

        print('Processing', event.upper())
        print(64 * '-')

        # Get the strain for detectors H1 and L1 (if necessary, this will
        # download the  strain from GWOSC)
        strain = dict(
            H1=catalog[event].strain('H1'),
            L1=catalog[event].strain('L1'),
        )

        # ---------------------------------------------------------------------