def accumulate(self, result_enum, qso_record_table, object_results): del qso_record_table, object_results for ar_m_med in result_enum: l_print_no_barrier("--- mean accumulate ----") m = mean_transmittance.MeanTransmittance.from_np_array( ar_m_med[0:4]) self.m.merge(m) med = median_transmittance.MedianTransmittance.from_np_array( ar_m_med[4:]) self.med.merge(med) return self.return_result()
def calc_delta_transmittance(): comm.Barrier() accumulate_over_spectra(delta_transmittance_chunk, DeltaTransmittanceAccumulator) l_print_no_barrier(pprint.pformat(local_delta_stats)) comm.Barrier() stats_list = comm.gather(local_delta_stats) if comm.rank == 0: total_stats = sum(stats_list, Counter()) r_print(pprint.pformat(total_stats))
def accumulate_over_spectra(func, accumulator): qso_record_table = table.Table(np.load( settings.get_qso_metadata_npy())) # type: table qso_record_count = len(qso_record_table) chunk_sizes, chunk_offsets = mpi_helper.get_chunks(qso_record_count, comm.size) local_start_index = chunk_offsets[comm.rank] local_size = chunk_sizes[comm.rank] local_end_index = local_start_index + local_size if comm.rank == 0: global_acc = accumulator(qso_record_count) local_qso_record_table = itertools.islice( qso_record_table, int(local_start_index), int(local_end_index)) # type: Iterable(table.Row) l_print_no_barrier("-----", qso_record_count, local_start_index, local_end_index, local_size) slice_size = settings.get_file_chunk_size() qso_chunks_iterable = enumerate( split_seq(slice_size, local_qso_record_table)) for slice_number, qso_record_table_chunk in qso_chunks_iterable: local_result = func(qso_record_table_chunk) # all large data is stored in an array as the first tuple element. ar_local_result = local_result[0] # generic objects (slower) can be store at the second tuple element. object_local_result = local_result[1] assert isinstance(ar_local_result, np.ndarray) ar_all_results = np.zeros(shape=(comm.size, ) + tuple(ar_local_result.shape)) comm.Gatherv(ar_local_result, ar_all_results, root=0) ar_qso_indices = np.zeros(shape=(comm.size, slice_size), dtype=int) # noinspection PyTypeChecker comm.Gatherv(np.array([x['index'] for x in qso_record_table_chunk]), ar_qso_indices) # metadata, or anything else that is small, but may have complex data types is transferred as objects: object_all_results = comm.gather(object_local_result) # "reduce" results if comm.rank == 0: global_acc.accumulate(ar_all_results, ar_qso_indices, object_all_results) global_acc.finalize() l_print_no_barrier("------------------------------") if comm.rank == 0: return global_acc.return_result() else: return None, None
def profile_main(): continuum_fit_container = accumulate_over_spectra(do_continuum_fit_chunk, ContinuumAccumulator) l_print_no_barrier(pprint.pformat(local_stats)) stats_list = comm.gather(local_stats) if comm.rank == 0: continuum_fit_metadata = continuum_fit_container.continuum_fit_metadata total_stats = sum(stats_list, Counter()) r_print(pprint.pformat(total_stats)) delta_f_snr_bins_helper = physics_functions.delta_f_snr_bins.DeltaFSNRBins( ) snr_stats = delta_f_snr_bins_helper.get_empty_histogram_array() for row in continuum_fit_metadata: snr = row['snr'] goodness_of_fit = row['goodness_of_fit'] # no #inspection PyTypeChecker bin_x = delta_f_snr_bins_helper.snr_to_bin(snr) bin_y = delta_f_snr_bins_helper.delta_f_to_bin(goodness_of_fit) snr_stats[2, bin_x, bin_y] += 1 # keep only the best fits (power law fit of the 0.9 quantile) power_law_fit_result, _snr_bins, _masked_snr_bins, _y_quantile = \ continuum_goodness_of_fit.calc_fit_power_law(snr_stats[2]) r_print('Continuum fit SNR selection Power-law: {0}'.format( continuum_goodness_of_fit.power_law_to_string( power_law_fit_result))) max_delta_f_per_snr = continuum_goodness_of_fit.get_max_delta_f_per_snr_func( power_law_fit_result) for row in continuum_fit_metadata: snr = row['snr'] goodness_of_fit = row['goodness_of_fit'] is_good_fit_result = (fit_pca.is_good_fit(snr, goodness_of_fit) and goodness_of_fit < max_delta_f_per_snr(snr)) # update the QSO fit table with the final fit status row['is_good_fit'] = is_good_fit_result # no #inspection PyTypeChecker bin_x = delta_f_snr_bins_helper.snr_to_bin(snr) bin_y = delta_f_snr_bins_helper.delta_f_to_bin(goodness_of_fit) snr_stats[1 if is_good_fit_result else 0, bin_x, bin_y] += 1 # save the fit statistics np.save(settings.get_fit_snr_stats(), snr_stats) # save the fit metadata table continuum_fit_container.save()
def accumulate(self, result_enum, ar_qso_indices_list, object_results): del object_results for ar_delta_t, ar_qso_indices in zip(result_enum, ar_qso_indices_list): delta_t = NpSpectrumContainer.from_np_array(ar_delta_t, readonly=True) for j, n in zip(NpSpectrumIterator(delta_t), ar_qso_indices): # if self.n >= self.num_spectra: # break self.delta_t_file.set_wavelength(n, j.get_wavelength()) self.delta_t_file.set_flux(n, j.get_flux()) self.delta_t_file.set_ivar(n, j.get_ivar()) self.n += 1 l_print_no_barrier("n =", self.n) l_print_no_barrier("n =", self.n) return self.return_result()
def calc_mean_transmittance(): m, med = accumulate_over_spectra(mean_transmittance_chunk, MeanTransmittanceAccumulator) l_print_no_barrier("-------- END MEAN TRANSMITTANCE -------------") l_print_no_barrier(pprint.pformat(local_mean_stats)) comm.Barrier() stats_list = comm.gather(local_mean_stats) if comm.rank == 0: total_stats = sum(stats_list, Counter()) r_print(pprint.pformat(total_stats)) # decide whether to save mean/median results based on common settings: if settings.get_enable_weighted_mean_estimator(): m.save(settings.get_mean_transmittance_npy()) if settings.get_enable_weighted_median_estimator(): med.save(settings.get_median_transmittance_npy())
def calc_median_spectrum(galaxy_record_table, histogram_output_npz, group_parameters): num_spectra = len(galaxy_record_table) # allocate a very big array: spectra = np.zeros(shape=(num_spectra, spec_size)) spectrum_iterator = enum_spectra(qso_record_table=galaxy_record_table, pre_sort=False, and_mask=np.uint32(0), or_mask=np.uint32(0)) for n, spectrum in enumerate(spectrum_iterator): # type: int,QSOData ar_flux = np.interp(ar_wavelength, spectrum.ar_wavelength, spectrum.ar_flux, left=np.nan, right=np.nan) ar_ivar = np.interp(ar_wavelength, spectrum.ar_wavelength, spectrum.ar_ivar, left=np.nan, right=np.nan) ar_trend = savgol_filter(ar_flux, detrend_window, polyorder=2) # de-trend the spectrum ar_flux /= ar_trend # noinspection PyArgumentList mask = np.logical_and.reduce( (np.isfinite(ar_flux), ar_ivar > 0, ar_trend > 0.5)) ar_flux[~mask] = np.nan spectra[n] = ar_flux l_print_no_barrier('Starting Median Calculation') # calculate the median of the entire array ar_median = np.nanmedian(spectra, axis=0) l_print_no_barrier('Saving: {}'.format(histogram_output_npz)) save(output_file=histogram_output_npz, ar_median=ar_median, group_parameters=group_parameters)
def accumulate(self, result_enum, ar_qso_indices_list, object_all_results): for ar_continua, ar_qso_indices, object_result in zip( result_enum, ar_qso_indices_list, object_all_results): continua = ContinuumFitContainer.from_np_array_and_object( ar_continua, object_result) # array based mpi gather returns zeros at the end of the global array. # use the fact that the object based gather returns the correct number of elements: num_spectra = len(object_result) for n in range(num_spectra): index = ar_qso_indices[n] self.continuum_fit_container.set_wavelength( index, continua.get_wavelength(n)) self.continuum_fit_container.set_flux(index, continua.get_flux(n)) # TODO: refactor self.continuum_fit_container.copy_metadata( index, continua.get_metadata(n)) self.n += 1 l_print_no_barrier("n =", self.n) l_print_no_barrier("n =", self.n)
def mean_transmittance_chunk(qso_record_table): start_offset = qso_record_table[0]['index'] spectra = read_spectrum_hdf5.SpectraWithMetadata( qso_record_table, settings.get_qso_spectra_hdf5()) continuum_fit_file = ContinuumFitContainerFiles(False) m = mean_transmittance.MeanTransmittance(np.arange(*z_range)) med = median_transmittance.MedianTransmittance(np.arange(*z_range)) for n in range(len(qso_record_table)): qso_spec_obj = spectra.return_spectrum(n) index = qso_spec_obj.qso_rec.index ar_fit_spectrum = continuum_fit_file.get_flux(index) if not continuum_fit_file.get_is_good_fit(index): local_mean_stats['bad_fit'] += 1 l_print_no_barrier("skipped QSO (bad fit): ", qso_spec_obj.qso_rec) continue lya_forest_transmittance_binned = qso_transmittance_binned( qso_spec_obj, ar_fit_spectrum, local_mean_stats) if lya_forest_transmittance_binned.ar_transmittance.size: # save mean and/or median according to common settings: if settings.get_enable_weighted_mean_estimator(): m.add_flux_pre_binned( lya_forest_transmittance_binned.ar_transmittance, lya_forest_transmittance_binned.ar_mask, lya_forest_transmittance_binned.ar_ivar) if settings.get_enable_weighted_median_estimator(): med.add_flux_pre_binned( lya_forest_transmittance_binned.ar_transmittance, lya_forest_transmittance_binned.ar_mask, lya_forest_transmittance_binned.ar_ivar) mean_transmittance_chunk.num_spec += 1 l_print_no_barrier("finished chunk, num spectra:", mean_transmittance_chunk.num_spec, " offset: ", start_offset) return np.vstack((m.as_np_array(), med.as_np_array())), None
def ism_transmittance_chunk(qso_record_table): start_offset = qso_record_table[0]['index'] # spectra = read_spectrum_hdf5.SpectraWithMetadata(qso_record_table, settings.get_qso_spectra_hdf5()) # continuum_fit_file = NpSpectrumContainer(True, filename=settings.get_continuum_fit_npy()) delta_transmittance_file = NpSpectrumContainer( readonly=True, filename=settings.get_delta_t_npy(), max_wavelength_count=1000) num_spectra = len(qso_record_table) ism_delta_t = NpSpectrumContainer(False, num_spectra=num_spectra) # warning: np.ndarray is not initialized by default. zeroing manually. ism_delta_t.zero() n = 0 for i in range(len(qso_record_table)): qso_rec = QSORecord.from_row(qso_record_table[i]) index = qso_rec.index # read original delta transmittance ar_redshift = delta_transmittance_file.get_wavelength(index) # ar_flux = delta_transmittance_file.get_flux(index) ar_ivar = delta_transmittance_file.get_ivar(index) # get correction to ISM # ar_flux_new, ar_ivar_new, is_corrected = pre_process_spectrum.mw_lines.apply_correction( # ar_wavelength, np.ones_like(ar_flux), ar_ivar, qso_rec.ra, qso_rec.dec) ar_wavelength = (ar_redshift + 1) * lya_center # type: np.ndarray # limit maximum bin number because higher extinction bins are not reliable max_extinction_bin = max(20, ar_extinction_levels.size) if np.isfinite(qso_rec.extinction_g): extinction_bin = int( np.round( np.interp(qso_rec.extinction_g, ar_extinction_levels, np.arange(max_extinction_bin)))) else: extinction_bin = 0 l_print_no_barrier("extinction_bin = ", extinction_bin) ar_ism_resampled = np.interp( ar_wavelength, extinction_spectra_list[extinction_bin][0], extinction_spectra_list[extinction_bin][1], left=np.nan, right=np.nan) extinction = ar_extinction_levels[extinction_bin] # rescale according to QSO extinction l_print_no_barrier(qso_rec.extinction_g, extinction) ism_scale_factor = 1. ar_flux_new = (ar_ism_resampled - 1 ) * ism_scale_factor * qso_rec.extinction_g / extinction mask = np.logical_and(np.isfinite(ar_flux_new), ar_ivar) ism_delta_t.set_wavelength(i, ar_redshift[mask]) # use reciprocal to get absorption spectrum, then subtract 1 to get the delta ism_delta_t.set_flux(i, ar_flux_new[mask]) # ism_delta_t.set_flux(i, np.ones_like(ar_flux) * qso_rec.extinction_g) # use original ivar because we are not correcting an existing spectrum ism_delta_t.set_ivar(i, ar_ivar[mask]) n += 1 l_print_no_barrier("chunk n =", n, "offset =", start_offset) return ism_delta_t.as_np_array(), None
def do_continuum_fit_chunk(qso_record_table): start_offset = qso_record_table[0]['index'] spectra = read_spectrum_hdf5.SpectraWithMetadata( qso_record_table, settings.get_qso_spectra_hdf5()) num_spectra = len(qso_record_table) continuum_chunk = ContinuumFitContainer(num_spectra) # DISABLED FOR NOW # use_existing_mean_transmittance = os.path.exists(settings.get_median_transmittance_npy()) and os.path.exists( # settings.get_mean_delta_t_npy()) use_existing_mean_transmittance = False median_flux_correction_func = None if use_existing_mean_transmittance: # m = mean_transmittance.MeanTransmittance.from_file(settings.get_mean_transmittance_npy()) med = median_transmittance.MedianTransmittance.from_file( settings.get_median_transmittance_npy()) # for debugging with a small data set: # ignore values with less than 20 sample points # ar_z_mean_flux, ar_mean_flux = m.get_weighted_mean_with_minimum_count(20) ar_z_mean_flux, ar_mean_flux = med.get_weighted_median_with_minimum_count( 20) def median_flux_func(ar_z): np.interp(ar_z, ar_z_mean_flux, ar_mean_flux) ar_z_mean_correction, ar_mean_correction = get_weighted_mean_from_file( ) def median_flux_correction_func(ar_z): median_flux_func(ar_z) * ( 1 - np.interp(ar_z, ar_z_mean_correction, ar_mean_correction)) for n in range(len(qso_record_table)): current_qso_data = spectra.return_spectrum(n) pre_processed_qso_data, result_string = pre_process_spectrum.apply( current_qso_data) if result_string != 'processed': # error during pre-processing. log statistics of error causes. local_stats[result_string] += 1 continue ar_wavelength = pre_processed_qso_data.ar_wavelength ar_flux = pre_processed_qso_data.ar_flux ar_ivar = pre_processed_qso_data.ar_ivar qso_rec = pre_processed_qso_data.qso_rec # set z after pre-processing, because BAL QSOs have visually inspected redshift. z = qso_rec.z assert ar_flux.size == ar_ivar.size if not ar_ivar.sum() > 0 or not np.any(np.isfinite(ar_flux)): # no useful data local_stats['empty'] += 1 continue fit_result = fit_pca.fit( ar_wavelength / (1 + z), ar_flux, ar_ivar, z, boundary_value=np.nan, mean_flux_constraint_func=median_flux_correction_func) if not fit_result.is_good_fit: local_stats['bad_fit'] += 1 l_print_no_barrier("bad fit QSO: ", qso_rec) continuum_chunk.set_wavelength(n, ar_wavelength) continuum_chunk.set_flux(n, fit_result.spectrum) # TODO: find a way to estimate error, or create a file without ivar values. continuum_chunk.set_metadata(n, fit_result.is_good_fit, fit_result.goodness_of_fit, fit_result.snr) local_stats['accepted'] += 1 l_print_no_barrier("offset =", start_offset) return continuum_chunk.as_np_array(), continuum_chunk.as_object()
def delta_transmittance_chunk(qso_record_table): start_offset = qso_record_table[0]['index'] spectra = read_spectrum_hdf5.SpectraWithMetadata( qso_record_table, settings.get_qso_spectra_hdf5()) continuum_fit_file = ContinuumFitContainerFiles(False) num_spectra = len(qso_record_table) delta_t = NpSpectrumContainer(False, num_spectra=num_spectra) # warning: np.ndarray is not initialized by default. zeroing manually. delta_t.zero() m = mean_transmittance.MeanTransmittance.from_file( settings.get_mean_transmittance_npy()) # m = median_transmittance.MedianTransmittance.from_file(settings.get_median_transmittance_npy()) # for debugging with a small data set: # ignore values with less than 20 sample points ar_z_mean_transmittance, ar_mean_transmittance = m.get_weighted_mean_with_minimum_count( 20) # ar_z_mean_transmittance, ar_mean_transmittance = m.get_weighted_median_with_minimum_count(20, weighted=True) remove_dla = RemoveDlaSimple() pixel_weight = pixel_weight_coefficients.PixelWeight( pixel_weight_coefficients.DEFAULT_WEIGHT_Z_RANGE) for n in range(len(qso_record_table)): qso_spec_obj = spectra.return_spectrum(n) index = qso_spec_obj.qso_rec.index if not continuum_fit_file.get_is_good_fit(index): local_delta_stats['bad_fit'] += 1 l_print_no_barrier("skipped QSO (bad fit): ", qso_spec_obj.qso_rec) continue ar_fit_spectrum = continuum_fit_file.get_flux(index) # we assume the fit spectrum uses the same wavelengths. lya_forest_transmittance = qso_transmittance( qso_spec_obj, ar_fit_spectrum, local_delta_stats, downsample_factor=settings.get_forest_downsample_factor()) ar_z = lya_forest_transmittance.ar_z if ar_z.size: # prepare the mean transmittance for the z range of this QSO ar_mean_flux_for_z_range = np.asarray( np.interp(ar_z, ar_z_mean_transmittance, ar_mean_transmittance)) # delta transmittance is the change in relative transmittance vs the mean # therefore, subtract 1. ar_delta_t = lya_forest_transmittance.ar_transmittance / ar_mean_flux_for_z_range - 1 # finish the error estimation, and save it ar_delta_t_ivar = pixel_weight.eval( lya_forest_transmittance.ar_ivar, ar_mean_flux_for_z_range * lya_forest_transmittance.ar_fit, ar_z) # simple DLA removal (without using a catalog) if settings.get_enable_simple_dla_removal(): # remove DLA regions by setting the ivar of nearby pixels to 0 ar_dla_mask = remove_dla.get_mask(ar_delta_t) if np.any(ar_dla_mask): l_print_no_barrier("DLA(s) removed from QSO: ", qso_spec_obj.qso_rec) ar_delta_t_ivar[ar_dla_mask] = 0 # ignore nan or infinite values (in case m_mean has incomplete data because of a low sample size) # Note: using wavelength field to store redshift finite_mask = np.logical_and(np.isfinite(ar_delta_t), np.isfinite(ar_delta_t_ivar)) finite_z = ar_z[finite_mask] finite_delta_t = ar_delta_t[finite_mask] finite_ivar = ar_delta_t_ivar[finite_mask] # detrend forests with large enough range in comoving coordinates: finite_distances = cd.fast_comoving_distance(finite_z) if finite_distances[-1] - finite_distances[0] > 500: delta_t_boxcar = nu_boxcar(finite_distances, finite_delta_t, lambda c: c - 300, lambda c: c + 300, weights=finite_ivar) finite_delta_t = finite_delta_t - delta_t_boxcar delta_t.set_wavelength(n, finite_z) delta_t.set_flux(n, finite_delta_t) delta_t.set_ivar(n, finite_ivar) else: # empty record pass delta_transmittance_chunk.num_spec += 1 l_print_no_barrier("finished chunk, num spectra:", delta_transmittance_chunk.num_spec, " offset: ", start_offset) return delta_t.as_np_array(), None
def qso_transmittance(qso_spec_obj, ar_fit_spectrum, stats, downsample_factor=1): """ :type qso_spec_obj: QSOData :type ar_fit_spectrum: np.ndarray :type stats: Counter :type downsample_factor: int :return: """ empty_result = LyaForestTransmittance(np.array([]), np.array([]), np.array([]), np.array([])) pre_processed_qso_data, result_string = pre_process_spectrum.apply( qso_spec_obj) # set z after pre-processing, because BAL QSOs have visually inspected redshift. qso_rec = qso_spec_obj.qso_rec z = qso_rec.z if result_string != 'processed': # error during pre-processing. log statistics of error causes. stats[result_string] += 1 return empty_result ar_wavelength = pre_processed_qso_data.ar_wavelength ar_flux = pre_processed_qso_data.ar_flux ar_ivar = pre_processed_qso_data.ar_ivar assert ar_flux.size == ar_ivar.size if not ar_fit_spectrum.size: stats['empty_fit'] += 1 l_print_no_barrier("skipped QSO (empty fit): ", qso_rec) return empty_result assert ar_flux.size == ar_fit_spectrum.size if not ar_ivar.sum() > 0 or not np.any(np.isfinite(ar_flux)): # no useful data stats['empty'] += 1 return empty_result if downsample_factor != 1: # downsample the continuum (don't replace ar_wavelength and ar_ivar yet) _, ar_fit_spectrum, _ = downsample_spectrum(ar_wavelength, ar_fit_spectrum, ar_ivar, downsample_factor) # downsample the spectrum ar_wavelength, ar_flux, ar_ivar = downsample_spectrum( ar_wavelength, ar_flux, ar_ivar, downsample_factor) # transmission is only meaningful in the ly_alpha range, and also requires a valid fit for that wavelength # use the same range as in 1404.1801 (2014) forest_mask = np.logical_and(ar_wavelength > 1040 * (1 + z), ar_wavelength < 1200 * (1 + z)) fit_mask = ~np.isnan(ar_fit_spectrum) # since at high redshift the sample size becomes smaller, # discard all forest pixels that have a redshift greater/less than a globally defined value min_redshift = settings.get_min_forest_redshift() max_redshift = settings.get_max_forest_redshift() ar_redshift = ar_wavelength / lya_center - 1 redshift_mask = (min_redshift < ar_redshift) & (ar_redshift < max_redshift) redshift_mask &= get_line_masks(ar_redshift) ivar_mask = ar_ivar > 0 # combine all different masks effective_mask = forest_mask & fit_mask & redshift_mask & ivar_mask ar_wavelength_masked = np.asarray(ar_wavelength[effective_mask]) ar_fit_spectrum_masked = ar_fit_spectrum[effective_mask] # make sure we have any pixes before calling ar_fit_spectrum_masked.min() if ar_wavelength_masked.size < (150 / downsample_factor): stats['low_count'] += 1 l_print_no_barrier("skipped QSO (low pixel count): ", qso_rec) return empty_result fit_min_value = ar_fit_spectrum_masked.min() if fit_min_value < min_continuum_threshold: stats['low_continuum'] += 1 l_print_no_barrier("skipped QSO (low continuum) :", qso_rec) return empty_result stats['accepted'] += 1 l_print_no_barrier("accepted QSO", qso_rec) # suppress divide by zero: NaNs can be introduced by the downscale_spectrum method with np.errstate(divide='ignore'): ar_rel_transmittance = ar_flux / ar_fit_spectrum ar_rel_transmittance_masked = ar_rel_transmittance[effective_mask] ar_z_masked = ar_wavelength_masked / lya_center - 1 assert ar_z_masked.size == ar_rel_transmittance_masked.size assert not np.isnan(ar_rel_transmittance_masked.sum()) # calculate the weight of each point as a delta_t (without the mean transmittance part) ar_pipeline_ivar_masked = ar_ivar[effective_mask] * np.square( ar_fit_spectrum_masked) # optional: remove the weighted average of each forest # rel_transmittance_weighted_mean = np.average(ar_rel_transmittance_masked, weights=ar_pipeline_ivar_masked) # ar_rel_transmittance -= rel_transmittance_weighted_mean l_print_no_barrier("mean transmittance for QSO:", (ar_flux[effective_mask] / ar_fit_spectrum_masked).mean()) return LyaForestTransmittance(ar_z_masked, ar_rel_transmittance_masked, ar_pipeline_ivar_masked, ar_fit_spectrum_masked)
def profile_main(): galaxy_metadata_file_npy = settings.get_galaxy_metadata_npy() histogram_output_npz = settings.get_ism_real_median_npz() galaxy_record_table = table.Table(np.load(galaxy_metadata_file_npy)) num_extinction_bins = settings.get_num_extinction_bins() extinction_field_name = settings.get_extinction_source() # group results into extinction bins with roughly equal number of spectra. galaxy_record_table.sort([extinction_field_name]) # remove objects with unknown extinction galaxy_record_table = galaxy_record_table[np.where( np.isfinite(galaxy_record_table[extinction_field_name]))] # if comm.size > num_extinction_bins: # raise Exception('too many MPI nodes') # split the work into 'jobs' for each mpi node. # a job is defined as a single extinction bin. # the index of every extinction bin is its job number. job_sizes, job_offsets = get_chunks(num_extinction_bins, comm.size) job_start = job_offsets[comm.rank] job_end = job_start + job_sizes[comm.rank] chunk_sizes, chunk_offsets = get_chunks(len(galaxy_record_table), num_extinction_bins) for i in range(job_start, job_end): extinction_bin_start = chunk_offsets[i] extinction_bin_end = extinction_bin_start + chunk_sizes[i] extinction_bin_record_table = galaxy_record_table[ extinction_bin_start:extinction_bin_end] # this should be done before plate sort group_parameters = { 'extinction_bin_number': i, 'extinction_minimum': extinction_bin_record_table[extinction_field_name][0], 'extinction_maximum': extinction_bin_record_table[extinction_field_name][-1], 'extinction_mean': np.mean(extinction_bin_record_table[extinction_field_name]), 'extinction_median': np.median(extinction_bin_record_table[extinction_field_name]), } # sort by plate to avoid constant switching of fits files (which are per plate). extinction_bin_record_table.sort(['plate', 'mjd', 'fiberID']) base_filename, file_extension = splitext(histogram_output_npz) output_filename = '{}_{:02d}{}'.format(base_filename, i, file_extension) l_print_no_barrier('Starting extinction bin {}'.format(i)) calc_median_spectrum(extinction_bin_record_table, output_filename, group_parameters=group_parameters) l_print_no_barrier('Finished extinction bin {}'.format(i)) for _ in barrier_sleep(comm, use_yield=True): l_print_no_barrier("waiting") pass
def calc_ism_transmittance(): comm.Barrier() accumulate_over_spectra(ism_transmittance_chunk, ISMTransmittanceAccumulator) l_print_no_barrier(pprint.pformat(local_stats))
def add_pairs_in_sub_chunk(self, delta_t_file, local_pair_angles, pairs, pixel_pairs): local_pair_separation_bins = \ pixel_pairs.add_qso_pairs_to_bins(pairs, local_pair_angles, delta_t_file) mpi_helper.l_print('local pair count:', local_pair_separation_bins.get_pair_count()) local_pair_separation_bins_array = local_pair_separation_bins.get_data_as_array( ) local_pair_separation_bins_metadata = local_pair_separation_bins.get_metadata( ) local_array_shape = local_pair_separation_bins_array.shape array_block_size = np.prod(local_array_shape[1:]) comm.Barrier() mpi_helper.r_print("BEGIN GATHER") mpi_helper.l_print_no_barrier('local array shape:', local_array_shape) array_counts = comm.allgather(local_array_shape[0]) pair_separation_bins_array = None array_endings = np.cumsum(array_counts) array_displacements = array_endings - np.array(array_counts) if comm.rank == 0: mpi_helper.r_print('array count:', array_counts) root_array_shape = (np.sum(array_counts), ) + local_array_shape[1:] mpi_helper.r_print('root array shape:', root_array_shape) pair_separation_bins_array = np.ones(shape=root_array_shape, dtype=np.float64) send_buf = [ local_pair_separation_bins_array, local_array_shape[0] * array_block_size ] receive_buf = [ pair_separation_bins_array, np.multiply(array_counts, array_block_size), np.multiply(array_displacements, array_block_size), MPI.DOUBLE ] # mpi_helper.l_print(send_buf) comm.Gatherv(sendbuf=send_buf, recvbuf=receive_buf) list_pair_separation_bins_metadata = comm.gather( local_pair_separation_bins_metadata) comm.Barrier() mpi_helper.r_print("END_GATHER") if comm.rank == 0: # mpi_helper.r_print(receive_buf[0][0][0:10]) list_pair_separation_bins = [ type(local_pair_separation_bins).load_from( pair_separation_bins_array[ array_displacements[rank]:array_endings[rank]], metadata) for rank, metadata in enumerate( list_pair_separation_bins_metadata) ] # initialize bins only if this is the first time we get here if not self.pair_separation_bins: self.pair_separation_bins = local_pair_separation_bins.init_as( local_pair_separation_bins) # add new results to existing bins if list_pair_separation_bins: for i in list_pair_separation_bins: for g in i.dict_bins_3d_data.keys(): mpi_helper.l_print_no_barrier( np.sum(i.dict_bins_3d_data[g].ar_count)) self.pair_separation_bins = reduce(lambda x, y: x + y, list_pair_separation_bins, self.pair_separation_bins) mpi_helper.r_print('total number of pixel pairs in bins:', self.pair_separation_bins.get_pair_count()) self.pair_separation_bins.flush() pixel_pairs.significant_qso_pairs.save( settings.get_significant_qso_pairs_npy()) else: print('no results received.')
def add_pairs_in_sub_chunk(self, local_pair_angles, pairs): local_angular_separation_bins = \ calc_angular_separation(pairs, local_pair_angles, self.ar_extinction, self.extinction_mean) mpi_helper.l_print('local pair count:', local_angular_separation_bins[1].sum()) local_pair_separation_bins_array = local_angular_separation_bins local_pair_separation_bins_metadata = None local_array_shape = local_pair_separation_bins_array.shape array_block_size = np.prod(local_array_shape[1:]) comm.Barrier() mpi_helper.r_print("BEGIN GATHER") mpi_helper.l_print_no_barrier('local array shape:', local_array_shape) array_counts = comm.allgather(local_array_shape[0]) pair_separation_bins_array = None array_endings = np.cumsum(array_counts) array_displacements = array_endings - np.array(array_counts) if comm.rank == 0: mpi_helper.r_print('array count:', array_counts) root_array_shape = (np.sum(array_counts), ) + local_array_shape[1:] mpi_helper.r_print('root array shape:', root_array_shape) pair_separation_bins_array = np.ones(shape=root_array_shape, dtype=np.float64) send_buf = [ local_pair_separation_bins_array, local_array_shape[0] * array_block_size ] receive_buf = [ pair_separation_bins_array, np.multiply(array_counts, array_block_size), np.multiply(array_displacements, array_block_size), MPI.DOUBLE ] # mpi_helper.l_print(send_buf) comm.Gatherv(sendbuf=send_buf, recvbuf=receive_buf) list_pair_separation_bins_metadata = comm.gather( local_pair_separation_bins_metadata) comm.Barrier() mpi_helper.r_print("END_GATHER") if comm.rank == 0: # mpi_helper.r_print(receive_buf[0][0][0:10]) list_pair_separation_bins = [ pair_separation_bins_array[ array_displacements[rank]:array_endings[rank]] for rank, metadata in enumerate(list_pair_separation_bins_metadata) ] # initialize bins only if this is the first time we get here # for now use a function level static variable if self.angular_separation_bins is None: self.angular_separation_bins = np.zeros_like( local_angular_separation_bins) # add new results to existing bins if list_pair_separation_bins: self.angular_separation_bins = reduce( lambda x, y: x + y, list_pair_separation_bins, self.angular_separation_bins) mpi_helper.r_print('total number of pixel pairs in bins:', self.angular_separation_bins[1].sum()) np.save("../../data/extinction_correlation.npy", self.angular_separation_bins) # pixel_pairs.significant_qso_pairs.save(settings.get_significant_qso_pairs_npy()) else: print('no results received.')