Example #1
0
    def rms(self) -> float:
        """
        Calculates the RMS of the chunk, ignoring masked
        regions.

        Returns
        -------
        float
            RMS of the chunk, excluding masked regions.
        """
        return get_rms(self.intensity)
Example #2
0
 def generate_report(self,
                     dv: float = 0.12,
                     vel_roi: float = 10.) -> Dict[str, str]:
     mf_peak, peak_vel = self.peak_mf_intensity
     baseline = get_rms(self.matched_filter)
     centered = self.is_centered(dv, vel_roi)
     data = {
         "peak_response": f"{mf_peak:.3f}",
         "mf_baseline": f"{baseline:.5f}",
         "is_centered": bool(centered)
     }
     return data
Example #3
0
    def matched_filter(self) -> np.ndarray:
        """
        Return the matched filter spectrum by cross-correlation
        of the simulated and observed velocity stacks. The MF
        returned is in units of SNR.

        Returns
        -------
        np.ndarray
            Matched filter NumPy 1D array
        """
        matched_filter = np.correlate(self.intensity,
                                      self.sim_intensity,
                                      mode="same")
        matched_filter /= get_rms(matched_filter)
        return matched_filter
Example #4
0
def velocity_stack_pipeline(
    spectrum: Spectrum,
    observation: Observation,
    vel_width: float = 40.0,
    resolution: float = 0.0014,
    dv: float = 0.12,
    vel_roi: float = 10.0,
    rms_sigma: float = 3.0,
    n_workers: int = 1,
) -> Type[VelocityStack]:
    """
    High-level function for performing a velocity stack. Takes `Simulation` object,
    which provides information how much flux to expect per transition, as well as
    determines which frequency centers to chunk and stack.

    The chunk generation is parallelized using `joblib` with threading and shared
    memory. This step is the most time exhaustive so far, as it requires looping
    over the detected peaks, which can be hundreds.

    Parameters
    ----------
    spectrum : Spectrum
        Instance of a `molsim.Spectrum` object, which
        is produced from a `Simulation`
    observation : Observation
        Instance of a `molsim.Observation` object
    vel_width : float, optional
        Window size in velocity, by default +/-40.
    resolution : float, optional
        Resolution of the window, by default 0.0014 km/s
    dv : float, optional
        Nominal line width, by default 0.12
    vel_roi : float, optional
        Number of line widths to define the region of interest,
        by default 10.
    rms_sigma : float, optional
        Multiples of RMS to use as a threshold for interloper
        masking, by default 3.

    Returns
    -------
    VelocityStack
        Instance of the `VelocityStack` class, which
        wraps the results
    """
    obs_x, obs_y = observation.spectrum.frequency, observation.spectrum.Tb
    sim_x, sim_y = spectrum.freq_profile, spectrum.int_profile
    # find the peaks in the simulated spectrum to use as the frequency
    # center. This is used because it's more resilient to weird lineshapes.
    peak_indices = find_peaks(sim_x,
                              sim_y,
                              resolution,
                              min_sep=vel_roi * dv,
                              is_sim=True,
                              sigma=rms_sigma)
    centers = sim_x[peak_indices]
    obs_chunks = generate_spectrum_chunks(obs_x, obs_y, centers, vel_width,
                                          n_workers)
    sim_chunks = generate_spectrum_chunks(sim_x, sim_y, centers, vel_width,
                                          n_workers)
    # for each chunk, set the velocity mask to protect the intensity of each ROI
    for chunks in zip(obs_chunks, sim_chunks):
        for chunk_type, chunk in enumerate(chunks):
            if chunk_type == 0:
                bias = 0.
                # find which windows we should definitely mask because
                # we know there's something there
                coin_mask = np.asarray(
                    [chunk.frequency_in_window(freq) for freq in centers])
                coincidences = centers[coin_mask]
                # sometimes we don't have coincidences and that's okay
                if coincidences.sum() < 1:
                    coincidences = None
            else:
                # this shifts the threshold for flux masking; for simulations
                # we impose a large negative offset to zero everything out
                bias = -10.
                coincidences = None
            chunk.mask = (dv, vel_roi, rms_sigma, bias, coincidences)
    # the simulated data is used to weight the stacking
    expected_intensities = sim_y[peak_indices]
    max_expected = expected_intensities.max()
    # for each chunk, we set the weight to be equal to the contribution of
    # this specific transition compared to the other transitions, divided
    # by the observational RMS squared
    for obs_chunk, sim_chunk in zip(obs_chunks, sim_chunks):
        obs_rms = obs_chunk.rms
        cum_sim = np.nansum(sim_chunk.intensity)
        for chunk in [obs_chunk, sim_chunk]:
            chunk.weight = (cum_sim / max_expected) / (obs_rms**2.0)
    # perform the velocity stack for both observation and simulation
    (obs_stack_x, obs_stack_y,
     rms_weights) = velocity_stack(obs_chunks, vel_width, resolution)
    (_, sim_stack_y, _) = velocity_stack(sim_chunks, vel_width, resolution,
                                         rms_weights)
    # calculate the RMS of the observational velocity stack
    stack_rms = get_rms(obs_stack_y)
    obs_stack_y /= stack_rms
    sim_stack_y /= stack_rms
    result = VelocityStack(obs_stack_x, obs_stack_y, sim_stack_y, obs_chunks,
                           sim_chunks)
    return result
Example #5
0
    def mask(self, parameters: Tuple[float]) -> None:
        """
        Sets the mask used for calculating intensities. The `parameters`
        argument is a five-tuple, containing the line width `dv`, the
        multiplier term `vel_roi` for determining how many channels to
        protect its intensity (`dv * vel_roi`), and the `rms_sigma` as
        the number of sigma away from the RMS to mask for intensity
        calculations, a `bias` value that can control the thresholding
        with intensity, and `freqs` as a NumPy 1D array of frequency
        values that need to be manually blocked.

        This is coded this way because setter methods can only take a
        single argument.

        For the intensity check, we actually use the larger of two
        numbers: either some multiple of the RMS, or a small number
        used for when the RMS is zero, which happens to be simulations.
        
        Finally, the `freq_mask` is added to the full mask because
        it corresponds to regions where we know there are definitely
        coincidences due to the same molecule. 
        The main use case here is using the knowledge of the simulation 
        to blank off regions that will have flux but aren't large enough
        to be picked off in intensity. For prolate tops, these are K-ladders
        that are too weak to be seen, but are actually still there.
        
        The final mask, i.e. the one that is set as an attribute, corresponds
        to regions (i.e. where the mask is True) that will be set to NaN.

        Parameters
        ----------
        parameters : Tuple[float]
            dv, vel_roi, rms_sigma, bias, and freqs
        """
        # unpack the arguments
        dv, vel_roi, rms_sigma, bias, freqs = parameters
        # isolate the ROI and work out the peak intensity. We will mask
        # everything else with NaN above this
        roi_mask = np.logical_and(-dv * vel_roi <= self.velocity,
                                  dv * vel_roi >= self.velocity)
        threshold = (get_rms(self._intensity) * rms_sigma) + bias
        # freqs is either None, or a NumPy 1D array of frequencies
        if freqs is not None:
            assert type(freqs) == np.ndarray
            # convert frequencies into equivalent velocity
            vels = (freqs - self.center) * ckm / self.center
            # mask regions corresponding to known interloping frequencies
            # from the same molecule
            freq_mask = np.sum([
                np.logical_and((-dv * vel_roi) + vel <= self.velocity,
                               (dv * vel_roi) + vel >= self.velocity)
                for vel in vels
            ],
                               axis=0).astype(bool)
        else:
            freq_mask = np.zeros_like(roi_mask, dtype=bool)
        # combine the intensity and frequency masks
        blank_mask = (self._intensity >= threshold) + freq_mask
        # the ROI needs to be protected no matter what, so we set that
        # region to False always
        self._mask = blank_mask * (~roi_mask)