def rms(self) -> float: """ Calculates the RMS of the chunk, ignoring masked regions. Returns ------- float RMS of the chunk, excluding masked regions. """ return get_rms(self.intensity)
def generate_report(self, dv: float = 0.12, vel_roi: float = 10.) -> Dict[str, str]: mf_peak, peak_vel = self.peak_mf_intensity baseline = get_rms(self.matched_filter) centered = self.is_centered(dv, vel_roi) data = { "peak_response": f"{mf_peak:.3f}", "mf_baseline": f"{baseline:.5f}", "is_centered": bool(centered) } return data
def matched_filter(self) -> np.ndarray: """ Return the matched filter spectrum by cross-correlation of the simulated and observed velocity stacks. The MF returned is in units of SNR. Returns ------- np.ndarray Matched filter NumPy 1D array """ matched_filter = np.correlate(self.intensity, self.sim_intensity, mode="same") matched_filter /= get_rms(matched_filter) return matched_filter
def velocity_stack_pipeline( spectrum: Spectrum, observation: Observation, vel_width: float = 40.0, resolution: float = 0.0014, dv: float = 0.12, vel_roi: float = 10.0, rms_sigma: float = 3.0, n_workers: int = 1, ) -> Type[VelocityStack]: """ High-level function for performing a velocity stack. Takes `Simulation` object, which provides information how much flux to expect per transition, as well as determines which frequency centers to chunk and stack. The chunk generation is parallelized using `joblib` with threading and shared memory. This step is the most time exhaustive so far, as it requires looping over the detected peaks, which can be hundreds. Parameters ---------- spectrum : Spectrum Instance of a `molsim.Spectrum` object, which is produced from a `Simulation` observation : Observation Instance of a `molsim.Observation` object vel_width : float, optional Window size in velocity, by default +/-40. resolution : float, optional Resolution of the window, by default 0.0014 km/s dv : float, optional Nominal line width, by default 0.12 vel_roi : float, optional Number of line widths to define the region of interest, by default 10. rms_sigma : float, optional Multiples of RMS to use as a threshold for interloper masking, by default 3. Returns ------- VelocityStack Instance of the `VelocityStack` class, which wraps the results """ obs_x, obs_y = observation.spectrum.frequency, observation.spectrum.Tb sim_x, sim_y = spectrum.freq_profile, spectrum.int_profile # find the peaks in the simulated spectrum to use as the frequency # center. This is used because it's more resilient to weird lineshapes. peak_indices = find_peaks(sim_x, sim_y, resolution, min_sep=vel_roi * dv, is_sim=True, sigma=rms_sigma) centers = sim_x[peak_indices] obs_chunks = generate_spectrum_chunks(obs_x, obs_y, centers, vel_width, n_workers) sim_chunks = generate_spectrum_chunks(sim_x, sim_y, centers, vel_width, n_workers) # for each chunk, set the velocity mask to protect the intensity of each ROI for chunks in zip(obs_chunks, sim_chunks): for chunk_type, chunk in enumerate(chunks): if chunk_type == 0: bias = 0. # find which windows we should definitely mask because # we know there's something there coin_mask = np.asarray( [chunk.frequency_in_window(freq) for freq in centers]) coincidences = centers[coin_mask] # sometimes we don't have coincidences and that's okay if coincidences.sum() < 1: coincidences = None else: # this shifts the threshold for flux masking; for simulations # we impose a large negative offset to zero everything out bias = -10. coincidences = None chunk.mask = (dv, vel_roi, rms_sigma, bias, coincidences) # the simulated data is used to weight the stacking expected_intensities = sim_y[peak_indices] max_expected = expected_intensities.max() # for each chunk, we set the weight to be equal to the contribution of # this specific transition compared to the other transitions, divided # by the observational RMS squared for obs_chunk, sim_chunk in zip(obs_chunks, sim_chunks): obs_rms = obs_chunk.rms cum_sim = np.nansum(sim_chunk.intensity) for chunk in [obs_chunk, sim_chunk]: chunk.weight = (cum_sim / max_expected) / (obs_rms**2.0) # perform the velocity stack for both observation and simulation (obs_stack_x, obs_stack_y, rms_weights) = velocity_stack(obs_chunks, vel_width, resolution) (_, sim_stack_y, _) = velocity_stack(sim_chunks, vel_width, resolution, rms_weights) # calculate the RMS of the observational velocity stack stack_rms = get_rms(obs_stack_y) obs_stack_y /= stack_rms sim_stack_y /= stack_rms result = VelocityStack(obs_stack_x, obs_stack_y, sim_stack_y, obs_chunks, sim_chunks) return result
def mask(self, parameters: Tuple[float]) -> None: """ Sets the mask used for calculating intensities. The `parameters` argument is a five-tuple, containing the line width `dv`, the multiplier term `vel_roi` for determining how many channels to protect its intensity (`dv * vel_roi`), and the `rms_sigma` as the number of sigma away from the RMS to mask for intensity calculations, a `bias` value that can control the thresholding with intensity, and `freqs` as a NumPy 1D array of frequency values that need to be manually blocked. This is coded this way because setter methods can only take a single argument. For the intensity check, we actually use the larger of two numbers: either some multiple of the RMS, or a small number used for when the RMS is zero, which happens to be simulations. Finally, the `freq_mask` is added to the full mask because it corresponds to regions where we know there are definitely coincidences due to the same molecule. The main use case here is using the knowledge of the simulation to blank off regions that will have flux but aren't large enough to be picked off in intensity. For prolate tops, these are K-ladders that are too weak to be seen, but are actually still there. The final mask, i.e. the one that is set as an attribute, corresponds to regions (i.e. where the mask is True) that will be set to NaN. Parameters ---------- parameters : Tuple[float] dv, vel_roi, rms_sigma, bias, and freqs """ # unpack the arguments dv, vel_roi, rms_sigma, bias, freqs = parameters # isolate the ROI and work out the peak intensity. We will mask # everything else with NaN above this roi_mask = np.logical_and(-dv * vel_roi <= self.velocity, dv * vel_roi >= self.velocity) threshold = (get_rms(self._intensity) * rms_sigma) + bias # freqs is either None, or a NumPy 1D array of frequencies if freqs is not None: assert type(freqs) == np.ndarray # convert frequencies into equivalent velocity vels = (freqs - self.center) * ckm / self.center # mask regions corresponding to known interloping frequencies # from the same molecule freq_mask = np.sum([ np.logical_and((-dv * vel_roi) + vel <= self.velocity, (dv * vel_roi) + vel >= self.velocity) for vel in vels ], axis=0).astype(bool) else: freq_mask = np.zeros_like(roi_mask, dtype=bool) # combine the intensity and frequency masks blank_mask = (self._intensity >= threshold) + freq_mask # the ROI needs to be protected no matter what, so we set that # region to False always self._mask = blank_mask * (~roi_mask)