def profile_main(): galaxy_metadata_file_npy = settings.get_galaxy_metadata_npy() histogram_output_npz = settings.get_ism_histogram_npz() galaxy_record_table = table.Table(np.load(galaxy_metadata_file_npy)) num_extinction_bins = settings.get_num_extinction_bins() extinction_field_name = settings.get_extinction_source() ism_object_classes = settings.get_ism_object_classes() galaxy_table_mask = np.array( [i in ism_object_classes for i in galaxy_record_table['class']]) galaxy_record_table = galaxy_record_table[galaxy_table_mask] # group results into extinction bins with roughly equal number of spectra. galaxy_record_table.sort([extinction_field_name]) # remove objects with unknown extinction galaxy_record_table = galaxy_record_table[np.where( np.isfinite(galaxy_record_table[extinction_field_name]))] chunk_sizes, chunk_offsets = get_chunks(len(galaxy_record_table), num_extinction_bins) for i in range(num_extinction_bins): extinction_bin_start = chunk_offsets[i] extinction_bin_end = extinction_bin_start + chunk_sizes[i] extinction_bin_record_table = galaxy_record_table[ extinction_bin_start:extinction_bin_end] # this should be done before plate sort group_parameters = { 'extinction_bin_number': i, 'extinction_minimum': extinction_bin_record_table[extinction_field_name][0], 'extinction_maximum': extinction_bin_record_table[extinction_field_name][-1], 'extinction_average': np.mean(extinction_bin_record_table[extinction_field_name]), 'extinction_median': np.median(extinction_bin_record_table[extinction_field_name]), } # sort by plate to avoid constant switching of fits files (which are per plate). extinction_bin_record_table.sort(['plate', 'mjd', 'fiberID']) base_filename, file_extension = splitext(histogram_output_npz) histogram_output_filename = '{}_{:02d}{}'.format( base_filename, i, file_extension) r_print('Starting extinction bin {}'.format(i)) calc_median_spectrum(extinction_bin_record_table, histogram_output_filename, group_parameters=group_parameters) r_print('Finished extinction bin {}'.format(i))
def accumulate_over_spectra(func, accumulator): qso_record_table = table.Table(np.load( settings.get_qso_metadata_npy())) # type: table qso_record_count = len(qso_record_table) chunk_sizes, chunk_offsets = mpi_helper.get_chunks(qso_record_count, comm.size) local_start_index = chunk_offsets[comm.rank] local_size = chunk_sizes[comm.rank] local_end_index = local_start_index + local_size if comm.rank == 0: global_acc = accumulator(qso_record_count) local_qso_record_table = itertools.islice( qso_record_table, int(local_start_index), int(local_end_index)) # type: Iterable(table.Row) l_print_no_barrier("-----", qso_record_count, local_start_index, local_end_index, local_size) slice_size = settings.get_file_chunk_size() qso_chunks_iterable = enumerate( split_seq(slice_size, local_qso_record_table)) for slice_number, qso_record_table_chunk in qso_chunks_iterable: local_result = func(qso_record_table_chunk) # all large data is stored in an array as the first tuple element. ar_local_result = local_result[0] # generic objects (slower) can be store at the second tuple element. object_local_result = local_result[1] assert isinstance(ar_local_result, np.ndarray) ar_all_results = np.zeros(shape=(comm.size, ) + tuple(ar_local_result.shape)) comm.Gatherv(ar_local_result, ar_all_results, root=0) ar_qso_indices = np.zeros(shape=(comm.size, slice_size), dtype=int) # noinspection PyTypeChecker comm.Gatherv(np.array([x['index'] for x in qso_record_table_chunk]), ar_qso_indices) # metadata, or anything else that is small, but may have complex data types is transferred as objects: object_all_results = comm.gather(object_local_result) # "reduce" results if comm.rank == 0: global_acc.accumulate(ar_all_results, ar_qso_indices, object_all_results) global_acc.finalize() l_print_no_barrier("------------------------------") if comm.rank == 0: return global_acc.return_result() else: return None, None
def profile_main(): # initialize data sources qso_record_table = table.Table(np.load(settings.get_qso_metadata_npy())) if settings.get_ism_only_mode(): delta_t_filename = settings.get_forest_ism_npy() else: delta_t_filename = settings.get_delta_t_npy() delta_t_file = NpSpectrumContainer(True, num_spectra=len(qso_record_table), filename=delta_t_filename, max_wavelength_count=1000) # prepare data for quicker access qso_record_list = [QSORecord.from_row(i) for i in qso_record_table] ar_ra = np.array([i.ra for i in qso_record_list]) ar_dec = np.array([i.dec for i in qso_record_list]) ar_z = np.array([i.z for i in qso_record_list]) ar_distance = cd.fast_comoving_distance(ar_z) mpi_helper.r_print('QSO table size:', len(ar_distance)) # TODO: find a more precise value instead of z=1.9 # set maximum QSO angular separation to 200Mpc/h (in co-moving coordinates) # the article assumes h is measured in units of 100km/s/mpc radius_quantity = (200. * (100. * u.km / (u.Mpc * u.s)) / cd.H0 ) # type: u.Quantity max_transverse_separation = radius_quantity.value max_parallel_separation = radius_quantity.value max_angular_separation = max_transverse_separation / ( cd.comoving_distance(1.9) / u.radian) mpi_helper.r_print('maximum separation of QSOs:', Angle(max_angular_separation).to_string(unit=u.degree)) # print(ar_list) coord_set = coord.SkyCoord(ra=ar_ra * u.degree, dec=ar_dec * u.degree, distance=ar_distance * u.Mpc) data_state = None computation_state = None # either initialize variable or load them to resume if settings.get_resume(): if comm.rank == 0: # resume an existing state data_state = pickle.load( open(settings.get_restartable_data_state_p(), 'rb')) # type: DataState computation_state = pickle.load( open(settings.get_restartable_computation_state_p(), 'rb')) # type: ComputationState else: if comm.rank == 0: # initialize a new state # create a random permutation of the coordinate set # (this is done to balance the load on the nodes) new_coord_permutation = np.random.permutation(len(coord_set)) # data_state should hold everything required to reproduce the exact same computation, # so that it is possible to restart it from the last completed bundle. # NOTE: currently there is no plan to check for consistency on load. # changing the input data before restarting will produce undefined results. data_state = DataState( mpi_comm_size=comm.size, coord_permutation=new_coord_permutation, max_angular_separation=max_angular_separation) computation_state = ComputationState(bundle_index=0, sub_chunk_index=0) pickle.dump(data_state, open(settings.get_restartable_data_state_p(), 'wb')) # send state to all nodes: data_state = comm.bcast(data_state) computation_state = comm.bcast(computation_state) # type: ComputationState if max_angular_separation != data_state.max_angular_separation: raise Exception( "Cannot resume, angular separation has changed ({}->{})".format( data_state.max_angular_separation, max_angular_separation)) if comm.size != data_state.mpi_comm_size: raise Exception("Cannot resume, MPI COMM size must be {}".format( data_state.mpi_comm_size)) coord_permutation = data_state.coord_permutation first_sub_chunk_index = computation_state.sub_chunk_index # find all QSO pairs chunk_sizes, chunk_offsets = mpi_helper.get_chunks(len(coord_set), comm.size) local_start_index = chunk_offsets[comm.rank] local_end_index = local_start_index + chunk_sizes[comm.rank] if settings.get_enable_weighted_median_estimator(): accumulator_type = calc_pixel_pairs.accumulator_types.histogram assert not settings.get_enable_weighted_mean_estimator( ), "Median and mean estimators are mutually exclusive." assert not settings.get_enable_estimator_subsamples( ), "Subsamples not supported for histogram." elif settings.get_enable_weighted_mean_estimator(): if settings.get_enable_estimator_subsamples(): accumulator_type = calc_pixel_pairs.accumulator_types.mean_subsample else: accumulator_type = calc_pixel_pairs.accumulator_types.mean else: assert False, "Either median or mean estimators must be specified." pixel_pairs_object = calc_pixel_pairs.PixelPairs( cd, max_transverse_separation, max_parallel_separation, accumulator_type=accumulator_type) # divide the work into sub chunks # Warning: the number of sub chunks must be identical for all nodes because gather is called after each sub chunk. # NOTE: we no longer divide by comm.size to make sub chunk size independent of number of nodes, # because pairs are generated in bundles, instead of once at the beginning. num_sub_chunks_per_node = settings.get_mpi_num_sub_chunks() sub_chunk_helper = SubChunkHelper(pixel_pairs_object, settings.get_resume()) for bundle_index, local_qso_pair_angles, local_qso_pairs in generate_pairs( ar_dec, ar_ra, coord_permutation, coord_set, local_end_index, local_start_index, max_angular_separation, bundle_start_index=computation_state.bundle_index): pixel_pair_sub_chunks = mpi_helper.get_chunks(local_qso_pairs.shape[0], num_sub_chunks_per_node) sub_chunk_iterator = islice( enumerate(zip(pixel_pair_sub_chunks[0], pixel_pair_sub_chunks[1])), first_sub_chunk_index, None) # if resuming from a previous run, use the value in first_sub_chunk_index only once: first_sub_chunk_index = 0 for sub_chunk_index, (i, j) in sub_chunk_iterator: # save computation state to allow restarting if comm.rank == 0: save_computation_state(bundle_index=bundle_index, sub_chunk_index=sub_chunk_index) sub_chunk_start = j sub_chunk_end = j + i mpi_helper.l_print("sub_chunk: size", i, ", starting at", j, ",", sub_chunk_index, "out of", len(pixel_pair_sub_chunks[0])) sub_chunk_helper.add_pairs_in_sub_chunk( delta_t_file, local_qso_pair_angles, local_qso_pairs[sub_chunk_start:sub_chunk_end], pixel_pairs_object) # done. update computation state one last time with a very large bundle index if comm.rank == 0: save_computation_state(bundle_index=sys.maxsize, sub_chunk_index=sys.maxsize)
def profile_main(): # x = coord.SkyCoord(ra=10.68458*u.deg, dec=41.26917*u.deg, frame='icrs') # min_distance = cd.comoving_distance_transverse(2.1, **fidcosmo) # print('minimum distance', min_distance, 'Mpc/rad') # initialize data sources qso_record_table = table.Table(np.load(settings.get_qso_metadata_npy())) # prepare data for quicker access qso_record_list = [QSORecord.from_row(i) for i in qso_record_table] ar_ra = np.array([i.ra for i in qso_record_list]) ar_dec = np.array([i.dec for i in qso_record_list]) ar_z = np.array([i.z for i in qso_record_list]) ar_extinction = np.array([i.extinction_g for i in qso_record_list]) ar_distance = cd.fast_comoving_distance(ar_z) mpi_helper.r_print('QSO table size:', len(ar_distance)) # TODO: find a more precise value instead of z=1.9 # set maximum QSO angular separation to 200Mpc/h (in co-moving coordinates) # the article assumes h is measured in units of 100km/s/mpc radius_quantity = (200. * (100. * u.km / (u.Mpc * u.s)) / cd.H0 ) # type: u.Quantity radius = radius_quantity.value max_angular_separation = radius / (cd.comoving_distance(1.9) / u.radian) mpi_helper.r_print('maximum separation of QSOs:', Angle(max_angular_separation).to_string(unit=u.degree)) # print(ar_list) coord_set = coord.SkyCoord(ra=ar_ra * u.degree, dec=ar_dec * u.degree, distance=ar_distance * u.Mpc) # print(coord_set) # find all QSO pairs chunk_sizes, chunk_offsets = mpi_helper.get_chunks(len(coord_set), comm.size) local_start_index = chunk_offsets[comm.rank] local_end_index = local_start_index + chunk_sizes[comm.rank] mpi_helper.l_print('matching objects in range:', local_start_index, 'to', local_end_index) # each node matches a range of objects against the full list. count = matching.search_around_sky( coord_set[local_start_index:local_end_index], coord_set, max_angular_separation) # search around sky returns indices in the input lists. # each node should add its offset to get the QSO index in the original list (only for x[0]). # qso2 which contains the unmodified index to the full list of QSOs. # the third vector is a count so we can keep a reference to the angles vector. local_qso_index_1 = count[0] + local_start_index local_qso_index_2 = count[1] # find the mean ra,dec for each pair local_qso_ra_pairs = np.vstack( (ar_ra[local_qso_index_1], ar_ra[local_qso_index_2])) local_qso_dec_pairs = np.vstack( (ar_dec[local_qso_index_1], ar_dec[local_qso_index_2])) # we can safely assume that separations is small enough so we don't have catastrophic cancellation of the mean, # so checking the unit radius value is not required local_pair_means_ra, local_pair_means_dec, _ = find_spherical_mean_deg( local_qso_ra_pairs, local_qso_dec_pairs, axis=0) sky_groups = SkyGroups(nside=settings.get_healpix_nside()) group_id = sky_groups.get_group_ids(local_pair_means_ra, local_pair_means_dec) local_qso_pairs_with_unity = np.vstack( (local_qso_index_1, local_qso_index_2, group_id, np.arange(count[0].size))) local_qso_pair_angles = count[2].to(u.rad).value mpi_helper.l_print('number of QSO pairs (including identity pairs):', count[0].size) mpi_helper.l_print('angle vector size:', local_qso_pair_angles.size) # remove pairs of the same QSO. # local_qso_pairs = local_qso_pairs_with_unity.T[local_qso_pairs_with_unity[1] != local_qso_pairs_with_unity[0]] # remove pairs of the same QSO, which have different [plate,mjd,fiber] # assume that QSOs within roughly 10 arc-second (5e-5 rads) are the same object. local_qso_pairs = local_qso_pairs_with_unity.T[ local_qso_pair_angles > 5e-5] mpi_helper.l_print( 'total number of redundant objects removed:', local_qso_pairs_with_unity.shape[1] - local_qso_pairs.shape[0] - chunk_sizes[comm.rank]) # l_print(pairs) mpi_helper.l_print('number of QSO pairs:', local_qso_pairs.shape[0]) # l_print('angle vector:', x[2]) # divide the work into sub chunks # Warning: the number of sub chunks must be identical for all nodes because gather is called after each sub chunk. # divide by comm.size to make sub chunk size independent of number of nodes. num_sub_chunks_per_node = settings.get_mpi_num_sub_chunks() // comm.size pixel_pair_sub_chunks = mpi_helper.get_chunks(local_qso_pairs.shape[0], num_sub_chunks_per_node) sub_chunk_helper = SubChunkHelper(ar_extinction) for i, j, k in zip(pixel_pair_sub_chunks[0], pixel_pair_sub_chunks[1], itertools.count()): sub_chunk_start = j sub_chunk_end = j + i mpi_helper.l_print("sub_chunk: size", i, ", starting at", j, ",", k, "out of", len(pixel_pair_sub_chunks[0])) sub_chunk_helper.add_pairs_in_sub_chunk( local_qso_pair_angles, local_qso_pairs[sub_chunk_start:sub_chunk_end])
def profile_main(): galaxy_metadata_file_npy = settings.get_galaxy_metadata_npy() histogram_output_npz = settings.get_ism_real_median_npz() galaxy_record_table = table.Table(np.load(galaxy_metadata_file_npy)) num_extinction_bins = settings.get_num_extinction_bins() extinction_field_name = settings.get_extinction_source() # group results into extinction bins with roughly equal number of spectra. galaxy_record_table.sort([extinction_field_name]) # remove objects with unknown extinction galaxy_record_table = galaxy_record_table[np.where( np.isfinite(galaxy_record_table[extinction_field_name]))] # if comm.size > num_extinction_bins: # raise Exception('too many MPI nodes') # split the work into 'jobs' for each mpi node. # a job is defined as a single extinction bin. # the index of every extinction bin is its job number. job_sizes, job_offsets = get_chunks(num_extinction_bins, comm.size) job_start = job_offsets[comm.rank] job_end = job_start + job_sizes[comm.rank] chunk_sizes, chunk_offsets = get_chunks(len(galaxy_record_table), num_extinction_bins) for i in range(job_start, job_end): extinction_bin_start = chunk_offsets[i] extinction_bin_end = extinction_bin_start + chunk_sizes[i] extinction_bin_record_table = galaxy_record_table[ extinction_bin_start:extinction_bin_end] # this should be done before plate sort group_parameters = { 'extinction_bin_number': i, 'extinction_minimum': extinction_bin_record_table[extinction_field_name][0], 'extinction_maximum': extinction_bin_record_table[extinction_field_name][-1], 'extinction_mean': np.mean(extinction_bin_record_table[extinction_field_name]), 'extinction_median': np.median(extinction_bin_record_table[extinction_field_name]), } # sort by plate to avoid constant switching of fits files (which are per plate). extinction_bin_record_table.sort(['plate', 'mjd', 'fiberID']) base_filename, file_extension = splitext(histogram_output_npz) output_filename = '{}_{:02d}{}'.format(base_filename, i, file_extension) l_print_no_barrier('Starting extinction bin {}'.format(i)) calc_median_spectrum(extinction_bin_record_table, output_filename, group_parameters=group_parameters) l_print_no_barrier('Finished extinction bin {}'.format(i)) for _ in barrier_sleep(comm, use_yield=True): l_print_no_barrier("waiting") pass
def calc_median_spectrum(galaxy_record_table, histogram_output_npz, group_parameters): histogram = np.zeros(shape=(num_bins, spec_size)) global_histogram = np.zeros(shape=(num_bins, spec_size)) chunk_sizes, chunk_offsets = get_chunks(len(galaxy_record_table), comm.size) local_start_index = chunk_offsets[comm.rank] local_end_index = local_start_index + chunk_sizes[comm.rank] update_gather_mask = get_update_mask(num_update_gather, chunk_sizes[comm.rank]) spectrum_iterator = enum_spectra(qso_record_table=galaxy_record_table[ local_start_index:local_end_index], pre_sort=False, and_mask=np.uint32(0), or_mask=np.uint32(0)) for n, spectrum in enumerate(spectrum_iterator): # type: int,QSOData ar_flux = np.interp(ar_wavelength, spectrum.ar_wavelength, spectrum.ar_flux, left=np.nan, right=np.nan) ar_ivar = np.interp(ar_wavelength, spectrum.ar_wavelength, spectrum.ar_ivar, left=np.nan, right=np.nan) ar_trend = savgol_filter(ar_flux, detrend_window, polyorder=2) # de-trend the spectrum ar_flux /= ar_trend ar_flux_int = np.empty(shape=spec_size, dtype=np.int) ar_flux_int[:] = ((ar_flux - flux_min) * num_bins / flux_range).astype( np.int) ar_flux_int[ar_flux_int >= num_bins] = num_bins - 1 ar_flux_int[ar_flux_int < 0] = 0 # noinspection PyArgumentList mask = np.logical_and.reduce( (np.isfinite(ar_flux), ar_ivar > 0, ar_trend > 2.)) x = ar_flux_int[mask] y = np.arange(spec_size)[mask] # c = np.ones_like(y) c = ar_trend[mask] histogram[x, y] += c if update_gather_mask[n]: reduce_and_save(output_file=histogram_output_npz, global_histogram=global_histogram, histogram=histogram, group_parameters=group_parameters) # l_print_no_barrier(n) list_n = comm.gather(n) if comm.rank == 0: r_print(sum(list_n)) r_print('------------') reduce_and_save(output_file=histogram_output_npz, global_histogram=global_histogram, histogram=histogram, group_parameters=group_parameters)