def accumulate(self, result_enum, ar_qso_indices_list, object_results): del object_results for ar_delta_t, ar_qso_indices in zip(result_enum, ar_qso_indices_list): delta_t = NpSpectrumContainer.from_np_array(ar_delta_t, readonly=True) for j, n in zip(NpSpectrumIterator(delta_t), ar_qso_indices): # if self.n >= self.num_spectra: # break self.delta_t_file.set_wavelength(n, j.get_wavelength()) self.delta_t_file.set_flux(n, j.get_flux()) self.delta_t_file.set_ivar(n, j.get_ivar()) self.n += 1 l_print_no_barrier("n =", self.n) l_print_no_barrier("n =", self.n) return self.return_result()
def get_bundles(start, end, size): """ split a range into bundles. each bundle is a tuple with an offset and size. :type start: int :type end: int :type size: int :rtype: tuple(int, int) """ offsets = range(start, end, size) sizes = [size] * len(offsets) sizes[-1] = end - offsets[-1] return zip(offsets, sizes)
def accumulate(self, result_enum, ar_qso_indices_list, object_all_results): for ar_continua, ar_qso_indices, object_result in zip( result_enum, ar_qso_indices_list, object_all_results): continua = ContinuumFitContainer.from_np_array_and_object( ar_continua, object_result) # array based mpi gather returns zeros at the end of the global array. # use the fact that the object based gather returns the correct number of elements: num_spectra = len(object_result) for n in range(num_spectra): index = ar_qso_indices[n] self.continuum_fit_container.set_wavelength( index, continua.get_wavelength(n)) self.continuum_fit_container.set_flux(index, continua.get_flux(n)) # TODO: refactor self.continuum_fit_container.copy_metadata( index, continua.get_metadata(n)) self.n += 1 l_print_no_barrier("n =", self.n) l_print_no_barrier("n =", self.n)
def profile_main(): # initialize data sources qso_record_table = table.Table(np.load(settings.get_qso_metadata_npy())) if settings.get_ism_only_mode(): delta_t_filename = settings.get_forest_ism_npy() else: delta_t_filename = settings.get_delta_t_npy() delta_t_file = NpSpectrumContainer(True, num_spectra=len(qso_record_table), filename=delta_t_filename, max_wavelength_count=1000) # prepare data for quicker access qso_record_list = [QSORecord.from_row(i) for i in qso_record_table] ar_ra = np.array([i.ra for i in qso_record_list]) ar_dec = np.array([i.dec for i in qso_record_list]) ar_z = np.array([i.z for i in qso_record_list]) ar_distance = cd.fast_comoving_distance(ar_z) mpi_helper.r_print('QSO table size:', len(ar_distance)) # TODO: find a more precise value instead of z=1.9 # set maximum QSO angular separation to 200Mpc/h (in co-moving coordinates) # the article assumes h is measured in units of 100km/s/mpc radius_quantity = (200. * (100. * u.km / (u.Mpc * u.s)) / cd.H0 ) # type: u.Quantity max_transverse_separation = radius_quantity.value max_parallel_separation = radius_quantity.value max_angular_separation = max_transverse_separation / ( cd.comoving_distance(1.9) / u.radian) mpi_helper.r_print('maximum separation of QSOs:', Angle(max_angular_separation).to_string(unit=u.degree)) # print(ar_list) coord_set = coord.SkyCoord(ra=ar_ra * u.degree, dec=ar_dec * u.degree, distance=ar_distance * u.Mpc) data_state = None computation_state = None # either initialize variable or load them to resume if settings.get_resume(): if comm.rank == 0: # resume an existing state data_state = pickle.load( open(settings.get_restartable_data_state_p(), 'rb')) # type: DataState computation_state = pickle.load( open(settings.get_restartable_computation_state_p(), 'rb')) # type: ComputationState else: if comm.rank == 0: # initialize a new state # create a random permutation of the coordinate set # (this is done to balance the load on the nodes) new_coord_permutation = np.random.permutation(len(coord_set)) # data_state should hold everything required to reproduce the exact same computation, # so that it is possible to restart it from the last completed bundle. # NOTE: currently there is no plan to check for consistency on load. # changing the input data before restarting will produce undefined results. data_state = DataState( mpi_comm_size=comm.size, coord_permutation=new_coord_permutation, max_angular_separation=max_angular_separation) computation_state = ComputationState(bundle_index=0, sub_chunk_index=0) pickle.dump(data_state, open(settings.get_restartable_data_state_p(), 'wb')) # send state to all nodes: data_state = comm.bcast(data_state) computation_state = comm.bcast(computation_state) # type: ComputationState if max_angular_separation != data_state.max_angular_separation: raise Exception( "Cannot resume, angular separation has changed ({}->{})".format( data_state.max_angular_separation, max_angular_separation)) if comm.size != data_state.mpi_comm_size: raise Exception("Cannot resume, MPI COMM size must be {}".format( data_state.mpi_comm_size)) coord_permutation = data_state.coord_permutation first_sub_chunk_index = computation_state.sub_chunk_index # find all QSO pairs chunk_sizes, chunk_offsets = mpi_helper.get_chunks(len(coord_set), comm.size) local_start_index = chunk_offsets[comm.rank] local_end_index = local_start_index + chunk_sizes[comm.rank] if settings.get_enable_weighted_median_estimator(): accumulator_type = calc_pixel_pairs.accumulator_types.histogram assert not settings.get_enable_weighted_mean_estimator( ), "Median and mean estimators are mutually exclusive." assert not settings.get_enable_estimator_subsamples( ), "Subsamples not supported for histogram." elif settings.get_enable_weighted_mean_estimator(): if settings.get_enable_estimator_subsamples(): accumulator_type = calc_pixel_pairs.accumulator_types.mean_subsample else: accumulator_type = calc_pixel_pairs.accumulator_types.mean else: assert False, "Either median or mean estimators must be specified." pixel_pairs_object = calc_pixel_pairs.PixelPairs( cd, max_transverse_separation, max_parallel_separation, accumulator_type=accumulator_type) # divide the work into sub chunks # Warning: the number of sub chunks must be identical for all nodes because gather is called after each sub chunk. # NOTE: we no longer divide by comm.size to make sub chunk size independent of number of nodes, # because pairs are generated in bundles, instead of once at the beginning. num_sub_chunks_per_node = settings.get_mpi_num_sub_chunks() sub_chunk_helper = SubChunkHelper(pixel_pairs_object, settings.get_resume()) for bundle_index, local_qso_pair_angles, local_qso_pairs in generate_pairs( ar_dec, ar_ra, coord_permutation, coord_set, local_end_index, local_start_index, max_angular_separation, bundle_start_index=computation_state.bundle_index): pixel_pair_sub_chunks = mpi_helper.get_chunks(local_qso_pairs.shape[0], num_sub_chunks_per_node) sub_chunk_iterator = islice( enumerate(zip(pixel_pair_sub_chunks[0], pixel_pair_sub_chunks[1])), first_sub_chunk_index, None) # if resuming from a previous run, use the value in first_sub_chunk_index only once: first_sub_chunk_index = 0 for sub_chunk_index, (i, j) in sub_chunk_iterator: # save computation state to allow restarting if comm.rank == 0: save_computation_state(bundle_index=bundle_index, sub_chunk_index=sub_chunk_index) sub_chunk_start = j sub_chunk_end = j + i mpi_helper.l_print("sub_chunk: size", i, ", starting at", j, ",", sub_chunk_index, "out of", len(pixel_pair_sub_chunks[0])) sub_chunk_helper.add_pairs_in_sub_chunk( delta_t_file, local_qso_pair_angles, local_qso_pairs[sub_chunk_start:sub_chunk_end], pixel_pairs_object) # done. update computation state one last time with a very large bundle index if comm.rank == 0: save_computation_state(bundle_index=sys.maxsize, sub_chunk_index=sys.maxsize)
def profile_main(): # x = coord.SkyCoord(ra=10.68458*u.deg, dec=41.26917*u.deg, frame='icrs') # min_distance = cd.comoving_distance_transverse(2.1, **fidcosmo) # print('minimum distance', min_distance, 'Mpc/rad') # initialize data sources qso_record_table = table.Table(np.load(settings.get_qso_metadata_npy())) # prepare data for quicker access qso_record_list = [QSORecord.from_row(i) for i in qso_record_table] ar_ra = np.array([i.ra for i in qso_record_list]) ar_dec = np.array([i.dec for i in qso_record_list]) ar_z = np.array([i.z for i in qso_record_list]) ar_extinction = np.array([i.extinction_g for i in qso_record_list]) ar_distance = cd.fast_comoving_distance(ar_z) mpi_helper.r_print('QSO table size:', len(ar_distance)) # TODO: find a more precise value instead of z=1.9 # set maximum QSO angular separation to 200Mpc/h (in co-moving coordinates) # the article assumes h is measured in units of 100km/s/mpc radius_quantity = (200. * (100. * u.km / (u.Mpc * u.s)) / cd.H0 ) # type: u.Quantity radius = radius_quantity.value max_angular_separation = radius / (cd.comoving_distance(1.9) / u.radian) mpi_helper.r_print('maximum separation of QSOs:', Angle(max_angular_separation).to_string(unit=u.degree)) # print(ar_list) coord_set = coord.SkyCoord(ra=ar_ra * u.degree, dec=ar_dec * u.degree, distance=ar_distance * u.Mpc) # print(coord_set) # find all QSO pairs chunk_sizes, chunk_offsets = mpi_helper.get_chunks(len(coord_set), comm.size) local_start_index = chunk_offsets[comm.rank] local_end_index = local_start_index + chunk_sizes[comm.rank] mpi_helper.l_print('matching objects in range:', local_start_index, 'to', local_end_index) # each node matches a range of objects against the full list. count = matching.search_around_sky( coord_set[local_start_index:local_end_index], coord_set, max_angular_separation) # search around sky returns indices in the input lists. # each node should add its offset to get the QSO index in the original list (only for x[0]). # qso2 which contains the unmodified index to the full list of QSOs. # the third vector is a count so we can keep a reference to the angles vector. local_qso_index_1 = count[0] + local_start_index local_qso_index_2 = count[1] # find the mean ra,dec for each pair local_qso_ra_pairs = np.vstack( (ar_ra[local_qso_index_1], ar_ra[local_qso_index_2])) local_qso_dec_pairs = np.vstack( (ar_dec[local_qso_index_1], ar_dec[local_qso_index_2])) # we can safely assume that separations is small enough so we don't have catastrophic cancellation of the mean, # so checking the unit radius value is not required local_pair_means_ra, local_pair_means_dec, _ = find_spherical_mean_deg( local_qso_ra_pairs, local_qso_dec_pairs, axis=0) sky_groups = SkyGroups(nside=settings.get_healpix_nside()) group_id = sky_groups.get_group_ids(local_pair_means_ra, local_pair_means_dec) local_qso_pairs_with_unity = np.vstack( (local_qso_index_1, local_qso_index_2, group_id, np.arange(count[0].size))) local_qso_pair_angles = count[2].to(u.rad).value mpi_helper.l_print('number of QSO pairs (including identity pairs):', count[0].size) mpi_helper.l_print('angle vector size:', local_qso_pair_angles.size) # remove pairs of the same QSO. # local_qso_pairs = local_qso_pairs_with_unity.T[local_qso_pairs_with_unity[1] != local_qso_pairs_with_unity[0]] # remove pairs of the same QSO, which have different [plate,mjd,fiber] # assume that QSOs within roughly 10 arc-second (5e-5 rads) are the same object. local_qso_pairs = local_qso_pairs_with_unity.T[ local_qso_pair_angles > 5e-5] mpi_helper.l_print( 'total number of redundant objects removed:', local_qso_pairs_with_unity.shape[1] - local_qso_pairs.shape[0] - chunk_sizes[comm.rank]) # l_print(pairs) mpi_helper.l_print('number of QSO pairs:', local_qso_pairs.shape[0]) # l_print('angle vector:', x[2]) # divide the work into sub chunks # Warning: the number of sub chunks must be identical for all nodes because gather is called after each sub chunk. # divide by comm.size to make sub chunk size independent of number of nodes. num_sub_chunks_per_node = settings.get_mpi_num_sub_chunks() // comm.size pixel_pair_sub_chunks = mpi_helper.get_chunks(local_qso_pairs.shape[0], num_sub_chunks_per_node) sub_chunk_helper = SubChunkHelper(ar_extinction) for i, j, k in zip(pixel_pair_sub_chunks[0], pixel_pair_sub_chunks[1], itertools.count()): sub_chunk_start = j sub_chunk_end = j + i mpi_helper.l_print("sub_chunk: size", i, ", starting at", j, ",", k, "out of", len(pixel_pair_sub_chunks[0])) sub_chunk_helper.add_pairs_in_sub_chunk( local_qso_pair_angles, local_qso_pairs[sub_chunk_start:sub_chunk_end])