Ejemplo n.º 1
0
 def accumulate(self, result_enum, qso_record_table, object_results):
     del qso_record_table, object_results
     for ar_m_med in result_enum:
         l_print_no_barrier("--- mean accumulate ----")
         m = mean_transmittance.MeanTransmittance.from_np_array(
             ar_m_med[0:4])
         self.m.merge(m)
         med = median_transmittance.MedianTransmittance.from_np_array(
             ar_m_med[4:])
         self.med.merge(med)
     return self.return_result()
Ejemplo n.º 2
0
def calc_delta_transmittance():
    comm.Barrier()
    accumulate_over_spectra(delta_transmittance_chunk,
                            DeltaTransmittanceAccumulator)
    l_print_no_barrier(pprint.pformat(local_delta_stats))
    comm.Barrier()

    stats_list = comm.gather(local_delta_stats)
    if comm.rank == 0:
        total_stats = sum(stats_list, Counter())
        r_print(pprint.pformat(total_stats))
Ejemplo n.º 3
0
def accumulate_over_spectra(func, accumulator):
    qso_record_table = table.Table(np.load(
        settings.get_qso_metadata_npy()))  # type: table
    qso_record_count = len(qso_record_table)

    chunk_sizes, chunk_offsets = mpi_helper.get_chunks(qso_record_count,
                                                       comm.size)

    local_start_index = chunk_offsets[comm.rank]
    local_size = chunk_sizes[comm.rank]
    local_end_index = local_start_index + local_size
    if comm.rank == 0:
        global_acc = accumulator(qso_record_count)

    local_qso_record_table = itertools.islice(
        qso_record_table, int(local_start_index),
        int(local_end_index))  # type: Iterable(table.Row)
    l_print_no_barrier("-----", qso_record_count, local_start_index,
                       local_end_index, local_size)
    slice_size = settings.get_file_chunk_size()
    qso_chunks_iterable = enumerate(
        split_seq(slice_size, local_qso_record_table))
    for slice_number, qso_record_table_chunk in qso_chunks_iterable:
        local_result = func(qso_record_table_chunk)
        # all large data is stored in an array as the first tuple element.
        ar_local_result = local_result[0]
        # generic objects (slower) can be store at the second tuple element.
        object_local_result = local_result[1]

        assert isinstance(ar_local_result, np.ndarray)
        ar_all_results = np.zeros(shape=(comm.size, ) +
                                  tuple(ar_local_result.shape))
        comm.Gatherv(ar_local_result, ar_all_results, root=0)
        ar_qso_indices = np.zeros(shape=(comm.size, slice_size), dtype=int)
        # noinspection PyTypeChecker
        comm.Gatherv(np.array([x['index'] for x in qso_record_table_chunk]),
                     ar_qso_indices)

        # metadata, or anything else that is small, but may have complex data types is transferred as objects:
        object_all_results = comm.gather(object_local_result)

        # "reduce" results
        if comm.rank == 0:
            global_acc.accumulate(ar_all_results, ar_qso_indices,
                                  object_all_results)
            global_acc.finalize()

    l_print_no_barrier("------------------------------")
    if comm.rank == 0:
        return global_acc.return_result()
    else:
        return None, None
Ejemplo n.º 4
0
def profile_main():
    continuum_fit_container = accumulate_over_spectra(do_continuum_fit_chunk,
                                                      ContinuumAccumulator)
    l_print_no_barrier(pprint.pformat(local_stats))

    stats_list = comm.gather(local_stats)
    if comm.rank == 0:
        continuum_fit_metadata = continuum_fit_container.continuum_fit_metadata
        total_stats = sum(stats_list, Counter())
        r_print(pprint.pformat(total_stats))

        delta_f_snr_bins_helper = physics_functions.delta_f_snr_bins.DeltaFSNRBins(
        )
        snr_stats = delta_f_snr_bins_helper.get_empty_histogram_array()
        for row in continuum_fit_metadata:
            snr = row['snr']
            goodness_of_fit = row['goodness_of_fit']
            # no #inspection PyTypeChecker
            bin_x = delta_f_snr_bins_helper.snr_to_bin(snr)
            bin_y = delta_f_snr_bins_helper.delta_f_to_bin(goodness_of_fit)
            snr_stats[2, bin_x, bin_y] += 1

        # keep only the best fits (power law fit of the 0.9 quantile)
        power_law_fit_result, _snr_bins, _masked_snr_bins, _y_quantile = \
            continuum_goodness_of_fit.calc_fit_power_law(snr_stats[2])
        r_print('Continuum fit SNR selection Power-law: {0}'.format(
            continuum_goodness_of_fit.power_law_to_string(
                power_law_fit_result)))
        max_delta_f_per_snr = continuum_goodness_of_fit.get_max_delta_f_per_snr_func(
            power_law_fit_result)

        for row in continuum_fit_metadata:
            snr = row['snr']
            goodness_of_fit = row['goodness_of_fit']
            is_good_fit_result = (fit_pca.is_good_fit(snr, goodness_of_fit) and
                                  goodness_of_fit < max_delta_f_per_snr(snr))

            # update the QSO fit table with the final fit status
            row['is_good_fit'] = is_good_fit_result
            # no #inspection PyTypeChecker
            bin_x = delta_f_snr_bins_helper.snr_to_bin(snr)
            bin_y = delta_f_snr_bins_helper.delta_f_to_bin(goodness_of_fit)
            snr_stats[1 if is_good_fit_result else 0, bin_x, bin_y] += 1

        # save the fit statistics
        np.save(settings.get_fit_snr_stats(), snr_stats)
        # save the fit metadata table
        continuum_fit_container.save()
Ejemplo n.º 5
0
 def accumulate(self, result_enum, ar_qso_indices_list, object_results):
     del object_results
     for ar_delta_t, ar_qso_indices in zip(result_enum,
                                           ar_qso_indices_list):
         delta_t = NpSpectrumContainer.from_np_array(ar_delta_t,
                                                     readonly=True)
         for j, n in zip(NpSpectrumIterator(delta_t), ar_qso_indices):
             # if self.n >= self.num_spectra:
             # break
             self.delta_t_file.set_wavelength(n, j.get_wavelength())
             self.delta_t_file.set_flux(n, j.get_flux())
             self.delta_t_file.set_ivar(n, j.get_ivar())
             self.n += 1
         l_print_no_barrier("n =", self.n)
     l_print_no_barrier("n =", self.n)
     return self.return_result()
Ejemplo n.º 6
0
def calc_mean_transmittance():
    m, med = accumulate_over_spectra(mean_transmittance_chunk,
                                     MeanTransmittanceAccumulator)
    l_print_no_barrier("-------- END MEAN TRANSMITTANCE -------------")
    l_print_no_barrier(pprint.pformat(local_mean_stats))
    comm.Barrier()

    stats_list = comm.gather(local_mean_stats)
    if comm.rank == 0:
        total_stats = sum(stats_list, Counter())
        r_print(pprint.pformat(total_stats))
        # decide whether to save mean/median results based on common settings:
        if settings.get_enable_weighted_mean_estimator():
            m.save(settings.get_mean_transmittance_npy())
        if settings.get_enable_weighted_median_estimator():
            med.save(settings.get_median_transmittance_npy())
Ejemplo n.º 7
0
def calc_median_spectrum(galaxy_record_table, histogram_output_npz,
                         group_parameters):
    num_spectra = len(galaxy_record_table)
    # allocate a very big array:
    spectra = np.zeros(shape=(num_spectra, spec_size))

    spectrum_iterator = enum_spectra(qso_record_table=galaxy_record_table,
                                     pre_sort=False,
                                     and_mask=np.uint32(0),
                                     or_mask=np.uint32(0))
    for n, spectrum in enumerate(spectrum_iterator):  # type: int,QSOData
        ar_flux = np.interp(ar_wavelength,
                            spectrum.ar_wavelength,
                            spectrum.ar_flux,
                            left=np.nan,
                            right=np.nan)
        ar_ivar = np.interp(ar_wavelength,
                            spectrum.ar_wavelength,
                            spectrum.ar_ivar,
                            left=np.nan,
                            right=np.nan)

        ar_trend = savgol_filter(ar_flux, detrend_window, polyorder=2)

        # de-trend the spectrum
        ar_flux /= ar_trend

        # noinspection PyArgumentList
        mask = np.logical_and.reduce(
            (np.isfinite(ar_flux), ar_ivar > 0, ar_trend > 0.5))

        ar_flux[~mask] = np.nan

        spectra[n] = ar_flux

    l_print_no_barrier('Starting Median Calculation')
    # calculate the median of the entire array
    ar_median = np.nanmedian(spectra, axis=0)

    l_print_no_barrier('Saving: {}'.format(histogram_output_npz))
    save(output_file=histogram_output_npz,
         ar_median=ar_median,
         group_parameters=group_parameters)
Ejemplo n.º 8
0
    def accumulate(self, result_enum, ar_qso_indices_list, object_all_results):
        for ar_continua, ar_qso_indices, object_result in zip(
                result_enum, ar_qso_indices_list, object_all_results):

            continua = ContinuumFitContainer.from_np_array_and_object(
                ar_continua, object_result)
            # array based mpi gather returns zeros at the end of the global array.
            # use the fact that the object based gather returns the correct number of elements:
            num_spectra = len(object_result)
            for n in range(num_spectra):
                index = ar_qso_indices[n]
                self.continuum_fit_container.set_wavelength(
                    index, continua.get_wavelength(n))
                self.continuum_fit_container.set_flux(index,
                                                      continua.get_flux(n))
                # TODO: refactor
                self.continuum_fit_container.copy_metadata(
                    index, continua.get_metadata(n))
                self.n += 1
            l_print_no_barrier("n =", self.n)
        l_print_no_barrier("n =", self.n)
Ejemplo n.º 9
0
def mean_transmittance_chunk(qso_record_table):
    start_offset = qso_record_table[0]['index']
    spectra = read_spectrum_hdf5.SpectraWithMetadata(
        qso_record_table, settings.get_qso_spectra_hdf5())
    continuum_fit_file = ContinuumFitContainerFiles(False)

    m = mean_transmittance.MeanTransmittance(np.arange(*z_range))
    med = median_transmittance.MedianTransmittance(np.arange(*z_range))
    for n in range(len(qso_record_table)):
        qso_spec_obj = spectra.return_spectrum(n)
        index = qso_spec_obj.qso_rec.index
        ar_fit_spectrum = continuum_fit_file.get_flux(index)
        if not continuum_fit_file.get_is_good_fit(index):
            local_mean_stats['bad_fit'] += 1
            l_print_no_barrier("skipped QSO (bad fit): ", qso_spec_obj.qso_rec)
            continue

        lya_forest_transmittance_binned = qso_transmittance_binned(
            qso_spec_obj, ar_fit_spectrum, local_mean_stats)
        if lya_forest_transmittance_binned.ar_transmittance.size:
            # save mean and/or median according to common settings:
            if settings.get_enable_weighted_mean_estimator():
                m.add_flux_pre_binned(
                    lya_forest_transmittance_binned.ar_transmittance,
                    lya_forest_transmittance_binned.ar_mask,
                    lya_forest_transmittance_binned.ar_ivar)
            if settings.get_enable_weighted_median_estimator():
                med.add_flux_pre_binned(
                    lya_forest_transmittance_binned.ar_transmittance,
                    lya_forest_transmittance_binned.ar_mask,
                    lya_forest_transmittance_binned.ar_ivar)
            mean_transmittance_chunk.num_spec += 1

    l_print_no_barrier("finished chunk, num spectra:",
                       mean_transmittance_chunk.num_spec, " offset: ",
                       start_offset)
    return np.vstack((m.as_np_array(), med.as_np_array())), None
Ejemplo n.º 10
0
def ism_transmittance_chunk(qso_record_table):
    start_offset = qso_record_table[0]['index']
    # spectra = read_spectrum_hdf5.SpectraWithMetadata(qso_record_table, settings.get_qso_spectra_hdf5())
    # continuum_fit_file = NpSpectrumContainer(True, filename=settings.get_continuum_fit_npy())
    delta_transmittance_file = NpSpectrumContainer(
        readonly=True,
        filename=settings.get_delta_t_npy(),
        max_wavelength_count=1000)

    num_spectra = len(qso_record_table)
    ism_delta_t = NpSpectrumContainer(False, num_spectra=num_spectra)
    # warning: np.ndarray is not initialized by default. zeroing manually.
    ism_delta_t.zero()
    n = 0
    for i in range(len(qso_record_table)):
        qso_rec = QSORecord.from_row(qso_record_table[i])
        index = qso_rec.index

        # read original delta transmittance
        ar_redshift = delta_transmittance_file.get_wavelength(index)
        # ar_flux = delta_transmittance_file.get_flux(index)
        ar_ivar = delta_transmittance_file.get_ivar(index)

        # get correction to ISM
        # ar_flux_new, ar_ivar_new, is_corrected = pre_process_spectrum.mw_lines.apply_correction(
        #     ar_wavelength, np.ones_like(ar_flux), ar_ivar, qso_rec.ra, qso_rec.dec)

        ar_wavelength = (ar_redshift + 1) * lya_center  # type: np.ndarray
        # limit maximum bin number because higher extinction bins are not reliable
        max_extinction_bin = max(20, ar_extinction_levels.size)

        if np.isfinite(qso_rec.extinction_g):
            extinction_bin = int(
                np.round(
                    np.interp(qso_rec.extinction_g, ar_extinction_levels,
                              np.arange(max_extinction_bin))))
        else:
            extinction_bin = 0

        l_print_no_barrier("extinction_bin = ", extinction_bin)
        ar_ism_resampled = np.interp(
            ar_wavelength,
            extinction_spectra_list[extinction_bin][0],
            extinction_spectra_list[extinction_bin][1],
            left=np.nan,
            right=np.nan)
        extinction = ar_extinction_levels[extinction_bin]
        # rescale according to QSO extinction
        l_print_no_barrier(qso_rec.extinction_g, extinction)
        ism_scale_factor = 1.
        ar_flux_new = (ar_ism_resampled - 1
                       ) * ism_scale_factor * qso_rec.extinction_g / extinction

        mask = np.logical_and(np.isfinite(ar_flux_new), ar_ivar)

        ism_delta_t.set_wavelength(i, ar_redshift[mask])
        # use reciprocal to get absorption spectrum, then subtract 1 to get the delta
        ism_delta_t.set_flux(i, ar_flux_new[mask])
        # ism_delta_t.set_flux(i, np.ones_like(ar_flux) * qso_rec.extinction_g)
        # use original ivar because we are not correcting an existing spectrum
        ism_delta_t.set_ivar(i, ar_ivar[mask])

        n += 1

    l_print_no_barrier("chunk n =", n, "offset =", start_offset)
    return ism_delta_t.as_np_array(), None
Ejemplo n.º 11
0
def do_continuum_fit_chunk(qso_record_table):
    start_offset = qso_record_table[0]['index']
    spectra = read_spectrum_hdf5.SpectraWithMetadata(
        qso_record_table, settings.get_qso_spectra_hdf5())
    num_spectra = len(qso_record_table)
    continuum_chunk = ContinuumFitContainer(num_spectra)

    # DISABLED FOR NOW
    # use_existing_mean_transmittance = os.path.exists(settings.get_median_transmittance_npy()) and os.path.exists(
    #     settings.get_mean_delta_t_npy())
    use_existing_mean_transmittance = False

    median_flux_correction_func = None
    if use_existing_mean_transmittance:
        # m = mean_transmittance.MeanTransmittance.from_file(settings.get_mean_transmittance_npy())
        med = median_transmittance.MedianTransmittance.from_file(
            settings.get_median_transmittance_npy())
        # for debugging with a small data set:
        # ignore values with less than 20 sample points
        # ar_z_mean_flux, ar_mean_flux = m.get_weighted_mean_with_minimum_count(20)
        ar_z_mean_flux, ar_mean_flux = med.get_weighted_median_with_minimum_count(
            20)

        def median_flux_func(ar_z):
            np.interp(ar_z, ar_z_mean_flux, ar_mean_flux)

        ar_z_mean_correction, ar_mean_correction = get_weighted_mean_from_file(
        )

        def median_flux_correction_func(ar_z):
            median_flux_func(ar_z) * (
                1 - np.interp(ar_z, ar_z_mean_correction, ar_mean_correction))

    for n in range(len(qso_record_table)):
        current_qso_data = spectra.return_spectrum(n)

        pre_processed_qso_data, result_string = pre_process_spectrum.apply(
            current_qso_data)

        if result_string != 'processed':
            # error during pre-processing. log statistics of error causes.
            local_stats[result_string] += 1
            continue

        ar_wavelength = pre_processed_qso_data.ar_wavelength
        ar_flux = pre_processed_qso_data.ar_flux
        ar_ivar = pre_processed_qso_data.ar_ivar
        qso_rec = pre_processed_qso_data.qso_rec
        # set z after pre-processing, because BAL QSOs have visually inspected redshift.
        z = qso_rec.z
        assert ar_flux.size == ar_ivar.size

        if not ar_ivar.sum() > 0 or not np.any(np.isfinite(ar_flux)):
            # no useful data
            local_stats['empty'] += 1
            continue

        fit_result = fit_pca.fit(
            ar_wavelength / (1 + z),
            ar_flux,
            ar_ivar,
            z,
            boundary_value=np.nan,
            mean_flux_constraint_func=median_flux_correction_func)

        if not fit_result.is_good_fit:
            local_stats['bad_fit'] += 1
            l_print_no_barrier("bad fit QSO: ", qso_rec)

        continuum_chunk.set_wavelength(n, ar_wavelength)
        continuum_chunk.set_flux(n, fit_result.spectrum)
        # TODO: find a way to estimate error, or create a file without ivar values.

        continuum_chunk.set_metadata(n, fit_result.is_good_fit,
                                     fit_result.goodness_of_fit,
                                     fit_result.snr)

        local_stats['accepted'] += 1

    l_print_no_barrier("offset =", start_offset)
    return continuum_chunk.as_np_array(), continuum_chunk.as_object()
Ejemplo n.º 12
0
def delta_transmittance_chunk(qso_record_table):
    start_offset = qso_record_table[0]['index']
    spectra = read_spectrum_hdf5.SpectraWithMetadata(
        qso_record_table, settings.get_qso_spectra_hdf5())
    continuum_fit_file = ContinuumFitContainerFiles(False)

    num_spectra = len(qso_record_table)
    delta_t = NpSpectrumContainer(False, num_spectra=num_spectra)
    # warning: np.ndarray is not initialized by default. zeroing manually.
    delta_t.zero()
    m = mean_transmittance.MeanTransmittance.from_file(
        settings.get_mean_transmittance_npy())
    # m = median_transmittance.MedianTransmittance.from_file(settings.get_median_transmittance_npy())
    # for debugging with a small data set:
    # ignore values with less than 20 sample points
    ar_z_mean_transmittance, ar_mean_transmittance = m.get_weighted_mean_with_minimum_count(
        20)
    # ar_z_mean_transmittance, ar_mean_transmittance = m.get_weighted_median_with_minimum_count(20, weighted=True)
    remove_dla = RemoveDlaSimple()

    pixel_weight = pixel_weight_coefficients.PixelWeight(
        pixel_weight_coefficients.DEFAULT_WEIGHT_Z_RANGE)
    for n in range(len(qso_record_table)):
        qso_spec_obj = spectra.return_spectrum(n)
        index = qso_spec_obj.qso_rec.index

        if not continuum_fit_file.get_is_good_fit(index):
            local_delta_stats['bad_fit'] += 1
            l_print_no_barrier("skipped QSO (bad fit): ", qso_spec_obj.qso_rec)
            continue

        ar_fit_spectrum = continuum_fit_file.get_flux(index)
        # we assume the fit spectrum uses the same wavelengths.

        lya_forest_transmittance = qso_transmittance(
            qso_spec_obj,
            ar_fit_spectrum,
            local_delta_stats,
            downsample_factor=settings.get_forest_downsample_factor())
        ar_z = lya_forest_transmittance.ar_z
        if ar_z.size:
            # prepare the mean transmittance for the z range of this QSO
            ar_mean_flux_for_z_range = np.asarray(
                np.interp(ar_z, ar_z_mean_transmittance,
                          ar_mean_transmittance))

            # delta transmittance is the change in relative transmittance vs the mean
            # therefore, subtract 1.
            ar_delta_t = lya_forest_transmittance.ar_transmittance / ar_mean_flux_for_z_range - 1

            # finish the error estimation, and save it
            ar_delta_t_ivar = pixel_weight.eval(
                lya_forest_transmittance.ar_ivar,
                ar_mean_flux_for_z_range * lya_forest_transmittance.ar_fit,
                ar_z)

            # simple DLA removal (without using a catalog)
            if settings.get_enable_simple_dla_removal():
                # remove DLA regions by setting the ivar of nearby pixels to 0
                ar_dla_mask = remove_dla.get_mask(ar_delta_t)
                if np.any(ar_dla_mask):
                    l_print_no_barrier("DLA(s) removed from QSO: ",
                                       qso_spec_obj.qso_rec)
                ar_delta_t_ivar[ar_dla_mask] = 0

            # ignore nan or infinite values (in case m_mean has incomplete data because of a low sample size)
            # Note: using wavelength field to store redshift
            finite_mask = np.logical_and(np.isfinite(ar_delta_t),
                                         np.isfinite(ar_delta_t_ivar))
            finite_z = ar_z[finite_mask]
            finite_delta_t = ar_delta_t[finite_mask]
            finite_ivar = ar_delta_t_ivar[finite_mask]

            # detrend forests with large enough range in comoving coordinates:
            finite_distances = cd.fast_comoving_distance(finite_z)
            if finite_distances[-1] - finite_distances[0] > 500:
                delta_t_boxcar = nu_boxcar(finite_distances,
                                           finite_delta_t,
                                           lambda c: c - 300,
                                           lambda c: c + 300,
                                           weights=finite_ivar)
                finite_delta_t = finite_delta_t - delta_t_boxcar

            delta_t.set_wavelength(n, finite_z)
            delta_t.set_flux(n, finite_delta_t)
            delta_t.set_ivar(n, finite_ivar)
        else:
            # empty record
            pass
            delta_transmittance_chunk.num_spec += 1

    l_print_no_barrier("finished chunk, num spectra:",
                       delta_transmittance_chunk.num_spec, " offset: ",
                       start_offset)
    return delta_t.as_np_array(), None
Ejemplo n.º 13
0
def qso_transmittance(qso_spec_obj,
                      ar_fit_spectrum,
                      stats,
                      downsample_factor=1):
    """

    :type qso_spec_obj: QSOData
    :type ar_fit_spectrum: np.ndarray
    :type stats: Counter
    :type downsample_factor: int
    :return:
    """

    empty_result = LyaForestTransmittance(np.array([]), np.array([]),
                                          np.array([]), np.array([]))

    pre_processed_qso_data, result_string = pre_process_spectrum.apply(
        qso_spec_obj)

    # set z after pre-processing, because BAL QSOs have visually inspected redshift.
    qso_rec = qso_spec_obj.qso_rec
    z = qso_rec.z

    if result_string != 'processed':
        # error during pre-processing. log statistics of error causes.
        stats[result_string] += 1
        return empty_result

    ar_wavelength = pre_processed_qso_data.ar_wavelength
    ar_flux = pre_processed_qso_data.ar_flux
    ar_ivar = pre_processed_qso_data.ar_ivar

    assert ar_flux.size == ar_ivar.size

    if not ar_fit_spectrum.size:
        stats['empty_fit'] += 1
        l_print_no_barrier("skipped QSO (empty fit): ", qso_rec)
        return empty_result

    assert ar_flux.size == ar_fit_spectrum.size

    if not ar_ivar.sum() > 0 or not np.any(np.isfinite(ar_flux)):
        # no useful data
        stats['empty'] += 1
        return empty_result

    if downsample_factor != 1:
        # downsample the continuum (don't replace ar_wavelength and ar_ivar yet)
        _, ar_fit_spectrum, _ = downsample_spectrum(ar_wavelength,
                                                    ar_fit_spectrum, ar_ivar,
                                                    downsample_factor)
        # downsample the spectrum
        ar_wavelength, ar_flux, ar_ivar = downsample_spectrum(
            ar_wavelength, ar_flux, ar_ivar, downsample_factor)

    # transmission is only meaningful in the ly_alpha range, and also requires a valid fit for that wavelength
    # use the same range as in 1404.1801 (2014)
    forest_mask = np.logical_and(ar_wavelength > 1040 * (1 + z),
                                 ar_wavelength < 1200 * (1 + z))
    fit_mask = ~np.isnan(ar_fit_spectrum)
    # since at high redshift the sample size becomes smaller,
    # discard all forest pixels that have a redshift greater/less than a globally defined value
    min_redshift = settings.get_min_forest_redshift()
    max_redshift = settings.get_max_forest_redshift()
    ar_redshift = ar_wavelength / lya_center - 1

    redshift_mask = (min_redshift < ar_redshift) & (ar_redshift < max_redshift)
    redshift_mask &= get_line_masks(ar_redshift)

    ivar_mask = ar_ivar > 0

    # combine all different masks
    effective_mask = forest_mask & fit_mask & redshift_mask & ivar_mask
    ar_wavelength_masked = np.asarray(ar_wavelength[effective_mask])
    ar_fit_spectrum_masked = ar_fit_spectrum[effective_mask]

    # make sure we have any pixes before calling ar_fit_spectrum_masked.min()
    if ar_wavelength_masked.size < (150 / downsample_factor):
        stats['low_count'] += 1
        l_print_no_barrier("skipped QSO (low pixel count): ", qso_rec)
        return empty_result

    fit_min_value = ar_fit_spectrum_masked.min()
    if fit_min_value < min_continuum_threshold:
        stats['low_continuum'] += 1
        l_print_no_barrier("skipped QSO (low continuum) :", qso_rec)
        return empty_result

    stats['accepted'] += 1
    l_print_no_barrier("accepted QSO", qso_rec)

    # suppress divide by zero: NaNs can be introduced by the downscale_spectrum method
    with np.errstate(divide='ignore'):
        ar_rel_transmittance = ar_flux / ar_fit_spectrum
    ar_rel_transmittance_masked = ar_rel_transmittance[effective_mask]
    ar_z_masked = ar_wavelength_masked / lya_center - 1
    assert ar_z_masked.size == ar_rel_transmittance_masked.size
    assert not np.isnan(ar_rel_transmittance_masked.sum())

    # calculate the weight of each point as a delta_t (without the mean transmittance part)
    ar_pipeline_ivar_masked = ar_ivar[effective_mask] * np.square(
        ar_fit_spectrum_masked)

    # optional: remove the weighted average of each forest
    # rel_transmittance_weighted_mean = np.average(ar_rel_transmittance_masked, weights=ar_pipeline_ivar_masked)
    # ar_rel_transmittance -= rel_transmittance_weighted_mean

    l_print_no_barrier("mean transmittance for QSO:",
                       (ar_flux[effective_mask] /
                        ar_fit_spectrum_masked).mean())

    return LyaForestTransmittance(ar_z_masked, ar_rel_transmittance_masked,
                                  ar_pipeline_ivar_masked,
                                  ar_fit_spectrum_masked)
Ejemplo n.º 14
0
def profile_main():
    galaxy_metadata_file_npy = settings.get_galaxy_metadata_npy()
    histogram_output_npz = settings.get_ism_real_median_npz()

    galaxy_record_table = table.Table(np.load(galaxy_metadata_file_npy))

    num_extinction_bins = settings.get_num_extinction_bins()

    extinction_field_name = settings.get_extinction_source()

    # group results into extinction bins with roughly equal number of spectra.
    galaxy_record_table.sort([extinction_field_name])

    # remove objects with unknown extinction
    galaxy_record_table = galaxy_record_table[np.where(
        np.isfinite(galaxy_record_table[extinction_field_name]))]

    # if comm.size > num_extinction_bins:
    #     raise Exception('too many MPI nodes')

    # split the work into 'jobs' for each mpi node.
    # a job is defined as a single extinction bin.
    # the index of every extinction bin is its job number.

    job_sizes, job_offsets = get_chunks(num_extinction_bins, comm.size)
    job_start = job_offsets[comm.rank]
    job_end = job_start + job_sizes[comm.rank]

    chunk_sizes, chunk_offsets = get_chunks(len(galaxy_record_table),
                                            num_extinction_bins)

    for i in range(job_start, job_end):
        extinction_bin_start = chunk_offsets[i]
        extinction_bin_end = extinction_bin_start + chunk_sizes[i]

        extinction_bin_record_table = galaxy_record_table[
            extinction_bin_start:extinction_bin_end]

        # this should be done before plate sort
        group_parameters = {
            'extinction_bin_number':
            i,
            'extinction_minimum':
            extinction_bin_record_table[extinction_field_name][0],
            'extinction_maximum':
            extinction_bin_record_table[extinction_field_name][-1],
            'extinction_mean':
            np.mean(extinction_bin_record_table[extinction_field_name]),
            'extinction_median':
            np.median(extinction_bin_record_table[extinction_field_name]),
        }

        # sort by plate to avoid constant switching of fits files (which are per plate).
        extinction_bin_record_table.sort(['plate', 'mjd', 'fiberID'])

        base_filename, file_extension = splitext(histogram_output_npz)
        output_filename = '{}_{:02d}{}'.format(base_filename, i,
                                               file_extension)

        l_print_no_barrier('Starting extinction bin {}'.format(i))
        calc_median_spectrum(extinction_bin_record_table,
                             output_filename,
                             group_parameters=group_parameters)
        l_print_no_barrier('Finished extinction bin {}'.format(i))

    for _ in barrier_sleep(comm, use_yield=True):
        l_print_no_barrier("waiting")
        pass
Ejemplo n.º 15
0
def calc_ism_transmittance():
    comm.Barrier()
    accumulate_over_spectra(ism_transmittance_chunk,
                            ISMTransmittanceAccumulator)
    l_print_no_barrier(pprint.pformat(local_stats))
Ejemplo n.º 16
0
    def add_pairs_in_sub_chunk(self, delta_t_file, local_pair_angles, pairs,
                               pixel_pairs):
        local_pair_separation_bins = \
            pixel_pairs.add_qso_pairs_to_bins(pairs, local_pair_angles, delta_t_file)

        mpi_helper.l_print('local pair count:',
                           local_pair_separation_bins.get_pair_count())
        local_pair_separation_bins_array = local_pair_separation_bins.get_data_as_array(
        )
        local_pair_separation_bins_metadata = local_pair_separation_bins.get_metadata(
        )
        local_array_shape = local_pair_separation_bins_array.shape
        array_block_size = np.prod(local_array_shape[1:])

        comm.Barrier()
        mpi_helper.r_print("BEGIN GATHER")
        mpi_helper.l_print_no_barrier('local array shape:', local_array_shape)
        array_counts = comm.allgather(local_array_shape[0])

        pair_separation_bins_array = None
        array_endings = np.cumsum(array_counts)
        array_displacements = array_endings - np.array(array_counts)
        if comm.rank == 0:
            mpi_helper.r_print('array count:', array_counts)
            root_array_shape = (np.sum(array_counts), ) + local_array_shape[1:]
            mpi_helper.r_print('root array shape:', root_array_shape)
            pair_separation_bins_array = np.ones(shape=root_array_shape,
                                                 dtype=np.float64)

        send_buf = [
            local_pair_separation_bins_array,
            local_array_shape[0] * array_block_size
        ]
        receive_buf = [
            pair_separation_bins_array,
            np.multiply(array_counts, array_block_size),
            np.multiply(array_displacements, array_block_size), MPI.DOUBLE
        ]

        # mpi_helper.l_print(send_buf)

        comm.Gatherv(sendbuf=send_buf, recvbuf=receive_buf)
        list_pair_separation_bins_metadata = comm.gather(
            local_pair_separation_bins_metadata)
        comm.Barrier()
        mpi_helper.r_print("END_GATHER")

        if comm.rank == 0:
            # mpi_helper.r_print(receive_buf[0][0][0:10])
            list_pair_separation_bins = [
                type(local_pair_separation_bins).load_from(
                    pair_separation_bins_array[
                        array_displacements[rank]:array_endings[rank]],
                    metadata) for rank, metadata in enumerate(
                        list_pair_separation_bins_metadata)
            ]

            # initialize bins only if this is the first time we get here
            if not self.pair_separation_bins:
                self.pair_separation_bins = local_pair_separation_bins.init_as(
                    local_pair_separation_bins)

            # add new results to existing bins
            if list_pair_separation_bins:
                for i in list_pair_separation_bins:
                    for g in i.dict_bins_3d_data.keys():
                        mpi_helper.l_print_no_barrier(
                            np.sum(i.dict_bins_3d_data[g].ar_count))
                self.pair_separation_bins = reduce(lambda x, y: x + y,
                                                   list_pair_separation_bins,
                                                   self.pair_separation_bins)

                mpi_helper.r_print('total number of pixel pairs in bins:',
                                   self.pair_separation_bins.get_pair_count())
                self.pair_separation_bins.flush()
                pixel_pairs.significant_qso_pairs.save(
                    settings.get_significant_qso_pairs_npy())
            else:
                print('no results received.')
Ejemplo n.º 17
0
    def add_pairs_in_sub_chunk(self, local_pair_angles, pairs):
        local_angular_separation_bins = \
            calc_angular_separation(pairs, local_pair_angles, self.ar_extinction, self.extinction_mean)

        mpi_helper.l_print('local pair count:',
                           local_angular_separation_bins[1].sum())
        local_pair_separation_bins_array = local_angular_separation_bins
        local_pair_separation_bins_metadata = None
        local_array_shape = local_pair_separation_bins_array.shape
        array_block_size = np.prod(local_array_shape[1:])

        comm.Barrier()
        mpi_helper.r_print("BEGIN GATHER")
        mpi_helper.l_print_no_barrier('local array shape:', local_array_shape)
        array_counts = comm.allgather(local_array_shape[0])

        pair_separation_bins_array = None
        array_endings = np.cumsum(array_counts)
        array_displacements = array_endings - np.array(array_counts)
        if comm.rank == 0:
            mpi_helper.r_print('array count:', array_counts)
            root_array_shape = (np.sum(array_counts), ) + local_array_shape[1:]
            mpi_helper.r_print('root array shape:', root_array_shape)
            pair_separation_bins_array = np.ones(shape=root_array_shape,
                                                 dtype=np.float64)

        send_buf = [
            local_pair_separation_bins_array,
            local_array_shape[0] * array_block_size
        ]
        receive_buf = [
            pair_separation_bins_array,
            np.multiply(array_counts, array_block_size),
            np.multiply(array_displacements, array_block_size), MPI.DOUBLE
        ]

        # mpi_helper.l_print(send_buf)

        comm.Gatherv(sendbuf=send_buf, recvbuf=receive_buf)
        list_pair_separation_bins_metadata = comm.gather(
            local_pair_separation_bins_metadata)
        comm.Barrier()
        mpi_helper.r_print("END_GATHER")

        if comm.rank == 0:
            # mpi_helper.r_print(receive_buf[0][0][0:10])
            list_pair_separation_bins = [
                pair_separation_bins_array[
                    array_displacements[rank]:array_endings[rank]] for rank,
                metadata in enumerate(list_pair_separation_bins_metadata)
            ]

            # initialize bins only if this is the first time we get here
            # for now use a function level static variable
            if self.angular_separation_bins is None:
                self.angular_separation_bins = np.zeros_like(
                    local_angular_separation_bins)

            # add new results to existing bins
            if list_pair_separation_bins:
                self.angular_separation_bins = reduce(
                    lambda x, y: x + y, list_pair_separation_bins,
                    self.angular_separation_bins)

                mpi_helper.r_print('total number of pixel pairs in bins:',
                                   self.angular_separation_bins[1].sum())
                np.save("../../data/extinction_correlation.npy",
                        self.angular_separation_bins)
                # pixel_pairs.significant_qso_pairs.save(settings.get_significant_qso_pairs_npy())
            else:
                print('no results received.')