def test_find_nearby_pixels(self): radius_quantity = (200. * (100. * u.km / (u.Mpc * u.s)) / cd.H0) # type: u.Quantity radius = radius_quantity.value delta_t_file = NpSpectrumContainer(readonly=False, create_new=True, num_spectra=2, filename=None) ar_z0 = np.arange(1.95, 3.56, 0.002) delta_t_file.set_wavelength(0, ar_z0) delta_t_file.set_flux(0, np.sin(ar_z0 * 50)) delta_t_file.set_ivar(0, ar_z0) ar_z1 = np.arange(1.94, 3.4, 0.002) delta_t_file.set_wavelength(1, ar_z1) delta_t_file.set_flux(1, np.sin(ar_z1 * 50)) delta_t_file.set_ivar(1, ar_z1) pixel_pairs = calc_pixel_pairs.PixelPairs(cd, radius, radius, calc_pixel_pairs.accumulator_types.mean) qso_angle = 0.04 bin_dims = np.array([NUM_BINS_X, NUM_BINS_Y, 1]) bin_ranges = np.array([[0, 0, pixel_pairs.min_distance], [pixel_pairs.max_parallel_separation, pixel_pairs.max_transverse_separation, pixel_pairs.max_distance]]) pair_separation_bins_1 = bins_3d.Bins3D(dims=bin_dims, ranges=bin_ranges) pair_separation_bins_2 = bins_3d.Bins3D(dims=bin_dims, ranges=bin_ranges) pixel_pairs.find_nearby_pixels(accumulator=pair_separation_bins_1, qso_angle=qso_angle, spec1_index=0, spec2_index=1, delta_t_file=delta_t_file) pixel_pairs.find_nearby_pixels2(accumulator=pair_separation_bins_2, qso_angle=qso_angle, spec1_index=0, spec2_index=1, delta_t_file=delta_t_file) print(pair_separation_bins_1.ar_flux.sum(), pair_separation_bins_2.ar_flux.sum()) print(pair_separation_bins_1.ar_count.sum(), pair_separation_bins_2.ar_count.sum()) self.assertAlmostEqual((pair_separation_bins_1.ar_flux - pair_separation_bins_2.ar_flux).sum(), 0, 6) self.assertAlmostEqual((pair_separation_bins_1.ar_count - pair_separation_bins_2.ar_count).sum(), 0, 6) self.assertAlmostEqual((pair_separation_bins_1.ar_weights - pair_separation_bins_2.ar_weights).sum(), 0, 6) plot = True if plot: # plt.set_cmap('gray') with np.errstate(divide='ignore', invalid='ignore'): ar_est = (np.sum(pair_separation_bins_1.ar_flux, axis=2) / np.sum(pair_separation_bins_1.ar_weights, axis=2)) plt.imshow(ar_est, interpolation='nearest') plt.show()
class ISMTransmittanceAccumulator: """ Modify existing delta transmittance file. Replace forest with ISM spectra. It is intended to be used as a helper object called by mpi_accumulate.accumulate_over_spectra """ def __init__(self, num_spectra): self.num_spectra = num_spectra self.forest_ism_file = NpSpectrumContainer( False, num_spectra=self.num_spectra, filename=settings.get_forest_ism_npy(), max_wavelength_count=1000) self.n = 0 # initialize file self.forest_ism_file.zero() def accumulate(self, result_enum, ar_qso_indices_list, object_results): # unused parameter: del object_results for ar_chunk, ar_qso_indices in zip(result_enum, ar_qso_indices_list): forest_chunk = NpSpectrumContainer.from_np_array(ar_chunk, readonly=True) for j, n in zip(NpSpectrumIterator(forest_chunk), ar_qso_indices): # if self.n >= self.num_spectra: # break self.forest_ism_file.set_wavelength(n, j.get_wavelength()) self.forest_ism_file.set_flux(n, j.get_flux()) self.forest_ism_file.set_ivar(n, j.get_ivar()) self.n += 1 l_print_no_barrier("n =", self.n) l_print_no_barrier("n =", self.n) return self.return_result() def return_result(self): return self.n, None def finalize(self): pass
class DeltaTransmittanceAccumulator: """ Add delta transmittance data to a single memory mapped file. It is intended to be used as a helper object called by mpi_accumulate.accumulate_over_spectra """ def __init__(self, num_spectra): self.num_spectra = num_spectra self.delta_t_file = NpSpectrumContainer( False, num_spectra=self.num_spectra, filename=settings.get_delta_t_npy(), max_wavelength_count=1000) self.n = 0 # initialize file self.delta_t_file.zero() def accumulate(self, result_enum, ar_qso_indices_list, object_results): del object_results for ar_delta_t, ar_qso_indices in zip(result_enum, ar_qso_indices_list): delta_t = NpSpectrumContainer.from_np_array(ar_delta_t, readonly=True) for j, n in zip(NpSpectrumIterator(delta_t), ar_qso_indices): # if self.n >= self.num_spectra: # break self.delta_t_file.set_wavelength(n, j.get_wavelength()) self.delta_t_file.set_flux(n, j.get_flux()) self.delta_t_file.set_ivar(n, j.get_ivar()) self.n += 1 l_print_no_barrier("n =", self.n) l_print_no_barrier("n =", self.n) return self.return_result() def return_result(self): return self.n, None def finalize(self): pass
def delta_transmittance_chunk(qso_record_table): start_offset = qso_record_table[0]['index'] spectra = read_spectrum_hdf5.SpectraWithMetadata( qso_record_table, settings.get_qso_spectra_hdf5()) continuum_fit_file = ContinuumFitContainerFiles(False) num_spectra = len(qso_record_table) delta_t = NpSpectrumContainer(False, num_spectra=num_spectra) # warning: np.ndarray is not initialized by default. zeroing manually. delta_t.zero() m = mean_transmittance.MeanTransmittance.from_file( settings.get_mean_transmittance_npy()) # m = median_transmittance.MedianTransmittance.from_file(settings.get_median_transmittance_npy()) # for debugging with a small data set: # ignore values with less than 20 sample points ar_z_mean_transmittance, ar_mean_transmittance = m.get_weighted_mean_with_minimum_count( 20) # ar_z_mean_transmittance, ar_mean_transmittance = m.get_weighted_median_with_minimum_count(20, weighted=True) remove_dla = RemoveDlaSimple() pixel_weight = pixel_weight_coefficients.PixelWeight( pixel_weight_coefficients.DEFAULT_WEIGHT_Z_RANGE) for n in range(len(qso_record_table)): qso_spec_obj = spectra.return_spectrum(n) index = qso_spec_obj.qso_rec.index if not continuum_fit_file.get_is_good_fit(index): local_delta_stats['bad_fit'] += 1 l_print_no_barrier("skipped QSO (bad fit): ", qso_spec_obj.qso_rec) continue ar_fit_spectrum = continuum_fit_file.get_flux(index) # we assume the fit spectrum uses the same wavelengths. lya_forest_transmittance = qso_transmittance( qso_spec_obj, ar_fit_spectrum, local_delta_stats, downsample_factor=settings.get_forest_downsample_factor()) ar_z = lya_forest_transmittance.ar_z if ar_z.size: # prepare the mean transmittance for the z range of this QSO ar_mean_flux_for_z_range = np.asarray( np.interp(ar_z, ar_z_mean_transmittance, ar_mean_transmittance)) # delta transmittance is the change in relative transmittance vs the mean # therefore, subtract 1. ar_delta_t = lya_forest_transmittance.ar_transmittance / ar_mean_flux_for_z_range - 1 # finish the error estimation, and save it ar_delta_t_ivar = pixel_weight.eval( lya_forest_transmittance.ar_ivar, ar_mean_flux_for_z_range * lya_forest_transmittance.ar_fit, ar_z) # simple DLA removal (without using a catalog) if settings.get_enable_simple_dla_removal(): # remove DLA regions by setting the ivar of nearby pixels to 0 ar_dla_mask = remove_dla.get_mask(ar_delta_t) if np.any(ar_dla_mask): l_print_no_barrier("DLA(s) removed from QSO: ", qso_spec_obj.qso_rec) ar_delta_t_ivar[ar_dla_mask] = 0 # ignore nan or infinite values (in case m_mean has incomplete data because of a low sample size) # Note: using wavelength field to store redshift finite_mask = np.logical_and(np.isfinite(ar_delta_t), np.isfinite(ar_delta_t_ivar)) finite_z = ar_z[finite_mask] finite_delta_t = ar_delta_t[finite_mask] finite_ivar = ar_delta_t_ivar[finite_mask] # detrend forests with large enough range in comoving coordinates: finite_distances = cd.fast_comoving_distance(finite_z) if finite_distances[-1] - finite_distances[0] > 500: delta_t_boxcar = nu_boxcar(finite_distances, finite_delta_t, lambda c: c - 300, lambda c: c + 300, weights=finite_ivar) finite_delta_t = finite_delta_t - delta_t_boxcar delta_t.set_wavelength(n, finite_z) delta_t.set_flux(n, finite_delta_t) delta_t.set_ivar(n, finite_ivar) else: # empty record pass delta_transmittance_chunk.num_spec += 1 l_print_no_barrier("finished chunk, num spectra:", delta_transmittance_chunk.num_spec, " offset: ", start_offset) return delta_t.as_np_array(), None
def ism_transmittance_chunk(qso_record_table): start_offset = qso_record_table[0]['index'] # spectra = read_spectrum_hdf5.SpectraWithMetadata(qso_record_table, settings.get_qso_spectra_hdf5()) # continuum_fit_file = NpSpectrumContainer(True, filename=settings.get_continuum_fit_npy()) delta_transmittance_file = NpSpectrumContainer( readonly=True, filename=settings.get_delta_t_npy(), max_wavelength_count=1000) num_spectra = len(qso_record_table) ism_delta_t = NpSpectrumContainer(False, num_spectra=num_spectra) # warning: np.ndarray is not initialized by default. zeroing manually. ism_delta_t.zero() n = 0 for i in range(len(qso_record_table)): qso_rec = QSORecord.from_row(qso_record_table[i]) index = qso_rec.index # read original delta transmittance ar_redshift = delta_transmittance_file.get_wavelength(index) # ar_flux = delta_transmittance_file.get_flux(index) ar_ivar = delta_transmittance_file.get_ivar(index) # get correction to ISM # ar_flux_new, ar_ivar_new, is_corrected = pre_process_spectrum.mw_lines.apply_correction( # ar_wavelength, np.ones_like(ar_flux), ar_ivar, qso_rec.ra, qso_rec.dec) ar_wavelength = (ar_redshift + 1) * lya_center # type: np.ndarray # limit maximum bin number because higher extinction bins are not reliable max_extinction_bin = max(20, ar_extinction_levels.size) if np.isfinite(qso_rec.extinction_g): extinction_bin = int( np.round( np.interp(qso_rec.extinction_g, ar_extinction_levels, np.arange(max_extinction_bin)))) else: extinction_bin = 0 l_print_no_barrier("extinction_bin = ", extinction_bin) ar_ism_resampled = np.interp( ar_wavelength, extinction_spectra_list[extinction_bin][0], extinction_spectra_list[extinction_bin][1], left=np.nan, right=np.nan) extinction = ar_extinction_levels[extinction_bin] # rescale according to QSO extinction l_print_no_barrier(qso_rec.extinction_g, extinction) ism_scale_factor = 1. ar_flux_new = (ar_ism_resampled - 1 ) * ism_scale_factor * qso_rec.extinction_g / extinction mask = np.logical_and(np.isfinite(ar_flux_new), ar_ivar) ism_delta_t.set_wavelength(i, ar_redshift[mask]) # use reciprocal to get absorption spectrum, then subtract 1 to get the delta ism_delta_t.set_flux(i, ar_flux_new[mask]) # ism_delta_t.set_flux(i, np.ones_like(ar_flux) * qso_rec.extinction_g) # use original ivar because we are not correcting an existing spectrum ism_delta_t.set_ivar(i, ar_ivar[mask]) n += 1 l_print_no_barrier("chunk n =", n, "offset =", start_offset) return ism_delta_t.as_np_array(), None
class ContinuumFitContainer(object): def __init__(self, num_spectra=-1): self.num_spectra = num_spectra self.np_spectrum = NpSpectrumContainer(readonly=False, num_spectra=num_spectra) self.continuum_fit_metadata = table.Table() self.continuum_fit_metadata.add_columns( [table.Column(name='index', dtype='i8', unit=None, length=num_spectra), table.Column(name='is_good_fit', dtype='b', unit=None, length=num_spectra), table.Column(name='goodness_of_fit', dtype='f8', unit=None, length=num_spectra), table.Column(name='snr', dtype='f8', unit=None, length=num_spectra)]) # initialize array self.np_spectrum.zero() def get_wavelength(self, n): return self.np_spectrum.get_wavelength(n) def get_flux(self, n): return self.np_spectrum.get_flux(n) def set_wavelength(self, n, data): self.np_spectrum.set_wavelength(n, data) def set_flux(self, n, data): self.np_spectrum.set_flux(n, data) def set_metadata(self, n, is_good_fit, goodness_of_fit, snr): self.continuum_fit_metadata[n] = [n, is_good_fit, goodness_of_fit, snr] def copy_metadata(self, n, metadata): self.continuum_fit_metadata[n] = metadata def get_metadata(self, n): return self.continuum_fit_metadata[n] def get_is_good_fit(self, n): return self.get_metadata(n)['is_good_fit'] def get_goodness_of_fit(self, n): return self.get_metadata(n)['goodness_of_fit'] def get_snr(self, n): return self.get_metadata(n)['snr'] @classmethod def from_np_array_and_object(cls, np_array, obj): # TODO: consider refactoring. np_spectrum = NpSpectrumContainer.from_np_array(np_array, readonly=True) new_instance = cls(num_spectra=np_spectrum.num_spectra) # replace spectrum container with existing data new_instance.np_spectrum = np_spectrum # replace metadata with existing metadata object assert type(new_instance.continuum_fit_metadata) == type(obj) new_instance.continuum_fit_metadata = obj return new_instance def as_object(self): return self.continuum_fit_metadata def as_np_array(self): return self.np_spectrum.as_np_array()