コード例 #1
0
def profile_main():
    qso_record_table = table.Table(np.load(settings.get_qso_metadata_npy()))

    flag_stats = FlagStats()

    # assume qso_record_table is already sorted
    spec_sample = read_spectrum_fits.enum_spectra(qso_record_table,
                                                  pre_sort=False,
                                                  flag_stats=flag_stats)

    qso_spectra_hdf5 = settings.get_qso_spectra_hdf5()
    output_spectra = Hdf5SpectrumContainer(qso_spectra_hdf5,
                                           readonly=False,
                                           create_new=True,
                                           num_spectra=MAX_SPECTRA)

    if settings.get_single_process():
        result_enum = map(save_spectrum, spec_sample)
    else:
        assert False, "Not supported"

    for i in result_enum:
        index = i[0]
        output_spectra.set_wavelength(index, i[1])
        output_spectra.set_flux(index, i[2])
        output_spectra.set_ivar(index, i[3])

    for bit in range(0, 32):
        print(flag_stats.to_string(bit))

    print('Total count: ' + str(flag_stats.pixel_count))
コード例 #2
0
def remove_median(delta_t, ar_delta_t_median, ar_z):
    """
    Remove the median of the delta transmittance per redshift bin.
    The change is made in-place.

    :return:
    """

    # remove nan values (redshift bins with a total weight of 0)
    mask = ar_ivar_total != 0

    # calculate the mean of the delta transmittance per redshift bin.
    ar_median_no_nan = ar_delta_t_median[mask]
    ar_z_no_nan = ar_z[mask]

    empty_array = np.array([])

    n = 0
    # remove the mean (in-place)
    for i in range(delta_t.num_spectra):
        ar_wavelength = delta_t.get_wavelength(i)
        ar_flux = delta_t.get_flux(i)
        ar_ivar = delta_t.get_ivar(i)
        if ar_wavelength.size:
            ar_delta_t_correction = np.interp(ar_wavelength, ar_z_no_nan,
                                              ar_median_no_nan, 0, 0)
            delta_t.set_wavelength(i, ar_wavelength)
            delta_t.set_flux(i, ar_flux - ar_delta_t_correction)
            delta_t.set_ivar(i, ar_ivar)
            n += 1
        else:
            delta_t.set_wavelength(i, empty_array)
            delta_t.set_flux(i, empty_array)
            delta_t.set_ivar(i, empty_array)
コード例 #3
0
    def get_weighted_median(self, weighted=True):
        ar_median_weights = self.ar_flux_bins if weighted else self.ar_unweighted_flux_bins
        res = np.zeros(self.ar_z.size)
        for n in range(self.ar_z.size):
            res[n] = weighted_module.median(np.arange(self.flux_res),
                                            ar_median_weights[n])

        return res / self.flux_res * self.flux_range + self.flux_offset
コード例 #4
0
def profile_main():
    galaxy_metadata_file_npy = settings.get_galaxy_metadata_npy()
    histogram_output_npz = settings.get_ism_histogram_npz()

    galaxy_record_table = table.Table(np.load(galaxy_metadata_file_npy))

    num_extinction_bins = settings.get_num_extinction_bins()

    extinction_field_name = settings.get_extinction_source()

    ism_object_classes = settings.get_ism_object_classes()

    galaxy_table_mask = np.array(
        [i in ism_object_classes for i in galaxy_record_table['class']])
    galaxy_record_table = galaxy_record_table[galaxy_table_mask]

    # group results into extinction bins with roughly equal number of spectra.
    galaxy_record_table.sort([extinction_field_name])

    # remove objects with unknown extinction
    galaxy_record_table = galaxy_record_table[np.where(
        np.isfinite(galaxy_record_table[extinction_field_name]))]

    chunk_sizes, chunk_offsets = get_chunks(len(galaxy_record_table),
                                            num_extinction_bins)
    for i in range(num_extinction_bins):
        extinction_bin_start = chunk_offsets[i]
        extinction_bin_end = extinction_bin_start + chunk_sizes[i]

        extinction_bin_record_table = galaxy_record_table[
            extinction_bin_start:extinction_bin_end]

        # this should be done before plate sort
        group_parameters = {
            'extinction_bin_number':
            i,
            'extinction_minimum':
            extinction_bin_record_table[extinction_field_name][0],
            'extinction_maximum':
            extinction_bin_record_table[extinction_field_name][-1],
            'extinction_average':
            np.mean(extinction_bin_record_table[extinction_field_name]),
            'extinction_median':
            np.median(extinction_bin_record_table[extinction_field_name]),
        }

        # sort by plate to avoid constant switching of fits files (which are per plate).
        extinction_bin_record_table.sort(['plate', 'mjd', 'fiberID'])

        base_filename, file_extension = splitext(histogram_output_npz)
        histogram_output_filename = '{}_{:02d}{}'.format(
            base_filename, i, file_extension)

        r_print('Starting extinction bin {}'.format(i))
        calc_median_spectrum(extinction_bin_record_table,
                             histogram_output_filename,
                             group_parameters=group_parameters)
        r_print('Finished extinction bin {}'.format(i))
コード例 #5
0
def profile_main():
    t_ = create_qso_table()
    fill_qso_table(t_)
    t_.sort(['plate', 'mjd', 'fiberID'])

    # add indices after sort
    t_['index'] = range(len(t_))

    np.save(settings.get_qso_metadata_npy(), t_)
コード例 #6
0
ファイル: pixel_flags.py プロジェクト: yishayv/lyacorr
 def int_to_string(cls, flags):
     bit_string = ''
     for i in range(32):
         if flags & 1:
             if bit_string:
                 bit_string += '|'
             bit_string += (cls.FlagNames[i])
         flags >>= 1
     return bit_string
コード例 #7
0
def rolling_weighted_median(ar_data, ar_weights, box_size):
    ar_flux_smoothed = np.zeros_like(ar_data)
    box_size_lower = -(box_size // 2)
    box_size_upper = box_size // 2 + (box_size & 1)
    for j in range(ar_data.size):
        start = max(j + box_size_lower, 0)
        end = min(j + box_size_upper, ar_data.size)
        ar_flux_smoothed[j] = weighted.median(ar_data[start:end],
                                              ar_weights[start:end])
    return ar_flux_smoothed
コード例 #8
0
ファイル: mpi_helper.py プロジェクト: yishayv/lyacorr
def l_print(*args):
    """
    print message on each node, synchronized
    :param args:
    :return:
    """
    for rank in range(0, comm.size):
        comm.Barrier()
        if rank == comm.rank:
            l_print_no_barrier(*args)
        comm.Barrier()
コード例 #9
0
 def add_flux_pre_binned(self, ar_flux, ar_mask, ar_weights):
     for n in range(self.ar_z.size):
         if ar_mask[n]:
             ar_effective_weight = ar_weights[n]
             ar_effective_flux = np.asarray(ar_flux[n])
             ar_normalized_flux = ar_effective_flux / self.flux_range - self.flux_offset
             ar_flux_index_float = np.clip(ar_normalized_flux,
                                           self.flux_min,
                                           self.flux_max) * self.flux_res
             ar_flux_index = np.clip(ar_flux_index_float.astype(int), 0,
                                     self.flux_res - 1)
             self.ar_flux_bins[n, ar_flux_index] += ar_effective_weight
             self.ar_unweighted_flux_bins[n, ar_flux_index] += 1
コード例 #10
0
def get_bundles(start, end, size):
    """
    split a range into bundles.
    each bundle is a tuple with an offset and size.
    :type start: int
    :type end: int
    :type size: int
    :rtype: tuple(int, int)
    """
    offsets = range(start, end, size)
    sizes = [size] * len(offsets)
    sizes[-1] = end - offsets[-1]
    return zip(offsets, sizes)
コード例 #11
0
def nu_boxcar(x, y, x_left_func, x_right_func, weights=None):
    y_boxcar = np.zeros_like(y)
    if weights is None:
        weights = np.ones_like(x)
    for n in range(x.size):
        x_left = np.searchsorted(x, x_left_func(x[n]))
        x_right = np.searchsorted(x, x_right_func(x[n]))
        box_weights = weights[x_left:x_right]
        if box_weights.sum() > 0:
            y_boxcar[n] = np.average(y[x_left:x_right],
                                     weights=weights[x_left:x_right])
        else:
            y_boxcar[n] = y[n]
    return y_boxcar
コード例 #12
0
def calc_fit_power_law(delta_f_snr_bins=snr_stats_total):
    snr_bins = delta_f_snr_bins_helper.get_log_snr_axis()
    y_quantile = np.zeros_like(snr_bins)
    y1 = delta_f_snr_bins_helper.get_delta_f_axis()
    for i in range(50):
        y_quantile[i] = weighted.quantile(y1, delta_f_snr_bins[i], .9)
    mask = [np.logical_and(-0 < snr_bins, snr_bins < 3)]
    masked_snr_bins = snr_bins[mask]
    # print("x2:", masked_snr_bins)
    fit_params = lmfit.Parameters()
    fit_params.add('a', -2., min=-5, max=-1)
    fit_params.add('b', 1., min=0.1, max=20.)
    fit_params.add('c', 0.08, min=0, max=0.2)
    fit_params.add('d', 3, min=-5, max=5)
    fit_result = lmfit.minimize(fit_function, fit_params, kws={'data': y_quantile[mask], 'x': masked_snr_bins})
    return fit_result, snr_bins, masked_snr_bins, y_quantile
コード例 #13
0
    def accumulate(self, result_enum, ar_qso_indices_list, object_all_results):
        for ar_continua, ar_qso_indices, object_result in zip(
                result_enum, ar_qso_indices_list, object_all_results):

            continua = ContinuumFitContainer.from_np_array_and_object(
                ar_continua, object_result)
            # array based mpi gather returns zeros at the end of the global array.
            # use the fact that the object based gather returns the correct number of elements:
            num_spectra = len(object_result)
            for n in range(num_spectra):
                index = ar_qso_indices[n]
                self.continuum_fit_container.set_wavelength(
                    index, continua.get_wavelength(n))
                self.continuum_fit_container.set_flux(index,
                                                      continua.get_flux(n))
                # TODO: refactor
                self.continuum_fit_container.copy_metadata(
                    index, continua.get_metadata(n))
                self.n += 1
            l_print_no_barrier("n =", self.n)
        l_print_no_barrier("n =", self.n)
コード例 #14
0
def reduce_and_save(output_file, global_histogram, histogram,
                    group_parameters):
    comm.Reduce([histogram, MPI.DOUBLE], [global_histogram, MPI.DOUBLE],
                op=MPI.SUM,
                root=0)
    if comm.rank == 0:
        # compute the median and add it to the npz file
        ism_spec = np.zeros(shape=global_histogram.shape[1], dtype=np.double)
        for i in range(ism_spec.size):
            ism_spec[i] = weighted.quantile(
                np.arange(global_histogram.shape[0]), global_histogram[:, i],
                0.5)
        ism_spec *= float(flux_range) / num_bins
        ism_spec += flux_min

        np.savez_compressed(output_file,
                            histogram=global_histogram,
                            ar_wavelength=ar_wavelength,
                            flux_range=[flux_min, flux_max],
                            ism_spec=ism_spec,
                            group_parameters=group_parameters)
コード例 #15
0
    def get_mask_list(self, plate, mjd, fiber_id):
        qso_tuple = (plate, mjd, fiber_id)
        mask_list = []
        z_vi = None
        # if QSO is not in BAL list, return an empty list
        if qso_tuple in self.bal_dict:
            i = self.bal_dict[qso_tuple]
            d = self.data
            z_vi = d.Z_VI[i]
            for j in range(d.NCIV_450[i]):
                for line_center in self.line_centers.values():
                    # note that start<==>max
                    # add a safety margin
                    margin = 0.002
                    end = civ_rel_velocity_to_wavelength(
                        line_center, z_vi, d.VMIN_CIV_450[i][j]) * (1 + margin)
                    start = civ_rel_velocity_to_wavelength(
                        line_center, z_vi, d.VMAX_CIV_450[i][j]) * (1 - margin)
                    mask_list += [MaskElement(start, end)]

        return mask_list, z_vi
コード例 #16
0
def calc_fit_power_law(delta_f_snr_bins=snr_stats_total):
    snr_bins = delta_f_snr_bins_helper.get_log_snr_axis()
    y_quantile = np.zeros_like(snr_bins)
    y1 = delta_f_snr_bins_helper.get_delta_f_axis()
    for i in range(50):
        y_quantile[i] = weighted.quantile(y1, delta_f_snr_bins[i], .9)
    mask = [np.logical_and(-0 < snr_bins, snr_bins < 3)]
    masked_snr_bins = snr_bins[mask]
    # print("x2:", masked_snr_bins)
    fit_params = lmfit.Parameters()
    fit_params.add('a', -2., min=-5, max=-1)
    fit_params.add('b', 1., min=0.1, max=20.)
    fit_params.add('c', 0.08, min=0, max=0.2)
    # make sure the exponent base is non-negative
    fit_params.add('d', 3, min=-masked_snr_bins.min(), max=5)
    fit_result = lmfit.minimize(fit_function,
                                fit_params,
                                kws={
                                    'data': y_quantile[mask],
                                    'x': masked_snr_bins
                                })
    return fit_result, snr_bins, masked_snr_bins, y_quantile
コード例 #17
0
def mean_transmittance_chunk(qso_record_table):
    start_offset = qso_record_table[0]['index']
    spectra = read_spectrum_hdf5.SpectraWithMetadata(
        qso_record_table, settings.get_qso_spectra_hdf5())
    continuum_fit_file = ContinuumFitContainerFiles(False)

    m = mean_transmittance.MeanTransmittance(np.arange(*z_range))
    med = median_transmittance.MedianTransmittance(np.arange(*z_range))
    for n in range(len(qso_record_table)):
        qso_spec_obj = spectra.return_spectrum(n)
        index = qso_spec_obj.qso_rec.index
        ar_fit_spectrum = continuum_fit_file.get_flux(index)
        if not continuum_fit_file.get_is_good_fit(index):
            local_mean_stats['bad_fit'] += 1
            l_print_no_barrier("skipped QSO (bad fit): ", qso_spec_obj.qso_rec)
            continue

        lya_forest_transmittance_binned = qso_transmittance_binned(
            qso_spec_obj, ar_fit_spectrum, local_mean_stats)
        if lya_forest_transmittance_binned.ar_transmittance.size:
            # save mean and/or median according to common settings:
            if settings.get_enable_weighted_mean_estimator():
                m.add_flux_pre_binned(
                    lya_forest_transmittance_binned.ar_transmittance,
                    lya_forest_transmittance_binned.ar_mask,
                    lya_forest_transmittance_binned.ar_ivar)
            if settings.get_enable_weighted_median_estimator():
                med.add_flux_pre_binned(
                    lya_forest_transmittance_binned.ar_transmittance,
                    lya_forest_transmittance_binned.ar_mask,
                    lya_forest_transmittance_binned.ar_ivar)
            mean_transmittance_chunk.num_spec += 1

    l_print_no_barrier("finished chunk, num spectra:",
                       mean_transmittance_chunk.num_spec, " offset: ",
                       start_offset)
    return np.vstack((m.as_np_array(), med.as_np_array())), None
コード例 #18
0
def update_mean(delta_t_file):
    n = 0
    ar_z = np.arange(1.9, 3.5, 0.0005)

    # weighted mean
    ar_delta_t_sum = np.zeros_like(ar_z)
    ar_delta_t_count = np.zeros_like(ar_z)
    ar_delta_t_weighted = np.zeros_like(ar_z)

    # histogram median
    delta_t_min, delta_t_max = (-10, 10)
    delta_t_num_buckets = 1000
    ar_delta_t_histogram = np.zeros(shape=(ar_z.size, delta_t_num_buckets))

    ar_ivar_total = np.zeros_like(ar_z)
    # calculate the weighted sum of the delta transmittance per redshift bin.
    for i in range(delta_t_file.num_spectra):
        ar_z_unbinned = delta_t_file.get_wavelength(i)
        ar_delta_t_unbinned = delta_t_file.get_flux(i)
        ar_ivar_unbinned = delta_t_file.get_ivar(i)
        if ar_z_unbinned.size > 2:
            f_delta_t = interpolate.interp1d(ar_z_unbinned,
                                             ar_delta_t_unbinned,
                                             kind='nearest',
                                             bounds_error=False,
                                             fill_value=0,
                                             assume_sorted=True)
            ar_delta_t = f_delta_t(ar_z)
            f_ivar = interpolate.interp1d(ar_z_unbinned,
                                          ar_ivar_unbinned,
                                          kind='nearest',
                                          bounds_error=False,
                                          fill_value=0,
                                          assume_sorted=True)
            ar_ivar = f_ivar(ar_z)

            ar_delta_t_sum += ar_delta_t
            ar_delta_t_weighted += ar_delta_t * ar_ivar
            ar_delta_t_count += ar_delta_t != 0
            ar_ivar_total += ar_ivar

            ar_delta_t_clipped = np.clip(ar_delta_t, delta_t_min, delta_t_max)
            ar_delta_t_buckets = rescale(ar_delta_t_clipped,
                                         (delta_t_min, delta_t_max),
                                         (0, delta_t_num_buckets))
            ar_delta_t_buckets = np.clip(ar_delta_t_buckets.astype(np.int32),
                                         0, delta_t_num_buckets - 1)
            for j in range(ar_z.size):
                ar_delta_t_histogram[j, ar_delta_t_buckets[j]] += ar_ivar[j]
                if ar_ivar[j]:
                    pass
            n += 1

    # save intermediate result (the mean delta_t before removal)
    np.save(
        settings.get_mean_delta_t_npy(),
        np.vstack((ar_z, ar_delta_t_weighted, ar_ivar_total, ar_delta_t_sum,
                   ar_delta_t_count)))

    ar_delta_t_median = np.zeros_like(ar_z)
    for i in range(ar_z.size):
        ar_delta_t_median[i] = weighted.median(np.arange(delta_t_num_buckets),
                                               ar_delta_t_histogram[i])
        if i > 120:
            pass

    ar_delta_t_median = rescale(ar_delta_t_median, (0, delta_t_num_buckets),
                                (delta_t_min, delta_t_max))
    np.save(settings.get_median_delta_t_npy(),
            np.vstack((ar_z, ar_delta_t_median)))
    return ar_delta_t_weighted, ar_ivar_total, ar_z, n, ar_delta_t_median
コード例 #19
0
def do_continuum_fit_chunk(qso_record_table):
    start_offset = qso_record_table[0]['index']
    spectra = read_spectrum_hdf5.SpectraWithMetadata(
        qso_record_table, settings.get_qso_spectra_hdf5())
    num_spectra = len(qso_record_table)
    continuum_chunk = ContinuumFitContainer(num_spectra)

    # DISABLED FOR NOW
    # use_existing_mean_transmittance = os.path.exists(settings.get_median_transmittance_npy()) and os.path.exists(
    #     settings.get_mean_delta_t_npy())
    use_existing_mean_transmittance = False

    median_flux_correction_func = None
    if use_existing_mean_transmittance:
        # m = mean_transmittance.MeanTransmittance.from_file(settings.get_mean_transmittance_npy())
        med = median_transmittance.MedianTransmittance.from_file(
            settings.get_median_transmittance_npy())
        # for debugging with a small data set:
        # ignore values with less than 20 sample points
        # ar_z_mean_flux, ar_mean_flux = m.get_weighted_mean_with_minimum_count(20)
        ar_z_mean_flux, ar_mean_flux = med.get_weighted_median_with_minimum_count(
            20)

        def median_flux_func(ar_z):
            np.interp(ar_z, ar_z_mean_flux, ar_mean_flux)

        ar_z_mean_correction, ar_mean_correction = get_weighted_mean_from_file(
        )

        def median_flux_correction_func(ar_z):
            median_flux_func(ar_z) * (
                1 - np.interp(ar_z, ar_z_mean_correction, ar_mean_correction))

    for n in range(len(qso_record_table)):
        current_qso_data = spectra.return_spectrum(n)

        pre_processed_qso_data, result_string = pre_process_spectrum.apply(
            current_qso_data)

        if result_string != 'processed':
            # error during pre-processing. log statistics of error causes.
            local_stats[result_string] += 1
            continue

        ar_wavelength = pre_processed_qso_data.ar_wavelength
        ar_flux = pre_processed_qso_data.ar_flux
        ar_ivar = pre_processed_qso_data.ar_ivar
        qso_rec = pre_processed_qso_data.qso_rec
        # set z after pre-processing, because BAL QSOs have visually inspected redshift.
        z = qso_rec.z
        assert ar_flux.size == ar_ivar.size

        if not ar_ivar.sum() > 0 or not np.any(np.isfinite(ar_flux)):
            # no useful data
            local_stats['empty'] += 1
            continue

        fit_result = fit_pca.fit(
            ar_wavelength / (1 + z),
            ar_flux,
            ar_ivar,
            z,
            boundary_value=np.nan,
            mean_flux_constraint_func=median_flux_correction_func)

        if not fit_result.is_good_fit:
            local_stats['bad_fit'] += 1
            l_print_no_barrier("bad fit QSO: ", qso_rec)

        continuum_chunk.set_wavelength(n, ar_wavelength)
        continuum_chunk.set_flux(n, fit_result.spectrum)
        # TODO: find a way to estimate error, or create a file without ivar values.

        continuum_chunk.set_metadata(n, fit_result.is_good_fit,
                                     fit_result.goodness_of_fit,
                                     fit_result.snr)

        local_stats['accepted'] += 1

    l_print_no_barrier("offset =", start_offset)
    return continuum_chunk.as_np_array(), continuum_chunk.as_object()
コード例 #20
0
ar_map_unc_0 = None
ar_map_0_log = None
if comm.rank == 0:
    ar_map_0 = hp.fitsfunc.read_map("../../data/COM_CompMap_Dust-DL07-AvMaps_2048_R2.00.fits", field=0)
    ar_map_unc_0 = hp.fitsfunc.read_map("../../data/COM_CompMap_Dust-DL07-AvMaps_2048_R2.00.fits", field=1)
    # ar_map_0_log = np.log(ar_map_0)
    np.clip(ar_map_unc_0, 1e-2, np.inf, ar_map_unc_0)

    # optionally add a mock signal to the map
    mock = False
    if mock:
        ar_mock = ar_map_0
        nside_signal = 32
        radius = hp.nside2resol(nside_signal) / 2 / np.sqrt(2)

        for i in range(hp.nside2npix(nside_signal)):
            vec1 = hp.pix2vec(nside_signal, i)
            mask = hp.query_disc(ar_map_nside, vec=vec1, radius=radius)
            ar_mock[mask] *= 100

        ar_mock /= np.sqrt(100)

# send the map to all other nodes
ar_map_local = comm.bcast(ar_map_0)
ar_map_unc_local = comm.bcast(ar_map_unc_0)

# initialize correlation bins
num_bins = 100
ar_product_total = np.zeros(shape=(10, num_bins))
ar_weights_total = np.zeros(shape=(10, num_bins))
ar_counts_total = np.zeros(shape=(10, num_bins))
コード例 #21
0
def delta_transmittance_chunk(qso_record_table):
    start_offset = qso_record_table[0]['index']
    spectra = read_spectrum_hdf5.SpectraWithMetadata(
        qso_record_table, settings.get_qso_spectra_hdf5())
    continuum_fit_file = ContinuumFitContainerFiles(False)

    num_spectra = len(qso_record_table)
    delta_t = NpSpectrumContainer(False, num_spectra=num_spectra)
    # warning: np.ndarray is not initialized by default. zeroing manually.
    delta_t.zero()
    m = mean_transmittance.MeanTransmittance.from_file(
        settings.get_mean_transmittance_npy())
    # m = median_transmittance.MedianTransmittance.from_file(settings.get_median_transmittance_npy())
    # for debugging with a small data set:
    # ignore values with less than 20 sample points
    ar_z_mean_transmittance, ar_mean_transmittance = m.get_weighted_mean_with_minimum_count(
        20)
    # ar_z_mean_transmittance, ar_mean_transmittance = m.get_weighted_median_with_minimum_count(20, weighted=True)
    remove_dla = RemoveDlaSimple()

    pixel_weight = pixel_weight_coefficients.PixelWeight(
        pixel_weight_coefficients.DEFAULT_WEIGHT_Z_RANGE)
    for n in range(len(qso_record_table)):
        qso_spec_obj = spectra.return_spectrum(n)
        index = qso_spec_obj.qso_rec.index

        if not continuum_fit_file.get_is_good_fit(index):
            local_delta_stats['bad_fit'] += 1
            l_print_no_barrier("skipped QSO (bad fit): ", qso_spec_obj.qso_rec)
            continue

        ar_fit_spectrum = continuum_fit_file.get_flux(index)
        # we assume the fit spectrum uses the same wavelengths.

        lya_forest_transmittance = qso_transmittance(
            qso_spec_obj,
            ar_fit_spectrum,
            local_delta_stats,
            downsample_factor=settings.get_forest_downsample_factor())
        ar_z = lya_forest_transmittance.ar_z
        if ar_z.size:
            # prepare the mean transmittance for the z range of this QSO
            ar_mean_flux_for_z_range = np.asarray(
                np.interp(ar_z, ar_z_mean_transmittance,
                          ar_mean_transmittance))

            # delta transmittance is the change in relative transmittance vs the mean
            # therefore, subtract 1.
            ar_delta_t = lya_forest_transmittance.ar_transmittance / ar_mean_flux_for_z_range - 1

            # finish the error estimation, and save it
            ar_delta_t_ivar = pixel_weight.eval(
                lya_forest_transmittance.ar_ivar,
                ar_mean_flux_for_z_range * lya_forest_transmittance.ar_fit,
                ar_z)

            # simple DLA removal (without using a catalog)
            if settings.get_enable_simple_dla_removal():
                # remove DLA regions by setting the ivar of nearby pixels to 0
                ar_dla_mask = remove_dla.get_mask(ar_delta_t)
                if np.any(ar_dla_mask):
                    l_print_no_barrier("DLA(s) removed from QSO: ",
                                       qso_spec_obj.qso_rec)
                ar_delta_t_ivar[ar_dla_mask] = 0

            # ignore nan or infinite values (in case m_mean has incomplete data because of a low sample size)
            # Note: using wavelength field to store redshift
            finite_mask = np.logical_and(np.isfinite(ar_delta_t),
                                         np.isfinite(ar_delta_t_ivar))
            finite_z = ar_z[finite_mask]
            finite_delta_t = ar_delta_t[finite_mask]
            finite_ivar = ar_delta_t_ivar[finite_mask]

            # detrend forests with large enough range in comoving coordinates:
            finite_distances = cd.fast_comoving_distance(finite_z)
            if finite_distances[-1] - finite_distances[0] > 500:
                delta_t_boxcar = nu_boxcar(finite_distances,
                                           finite_delta_t,
                                           lambda c: c - 300,
                                           lambda c: c + 300,
                                           weights=finite_ivar)
                finite_delta_t = finite_delta_t - delta_t_boxcar

            delta_t.set_wavelength(n, finite_z)
            delta_t.set_flux(n, finite_delta_t)
            delta_t.set_ivar(n, finite_ivar)
        else:
            # empty record
            pass
            delta_transmittance_chunk.num_spec += 1

    l_print_no_barrier("finished chunk, num spectra:",
                       delta_transmittance_chunk.num_spec, " offset: ",
                       start_offset)
    return delta_t.as_np_array(), None
コード例 #22
0
ファイル: continuum_fit_pca.py プロジェクト: yishayv/lyacorr
    def fit_binned(self, pca, ar_flux_rebinned, ar_ivar_rebinned,
                   ar_mean_flux_constraint, qso_redshift):

        is_good_fit = True

        ar_red_flux_rebinned = ar_flux_rebinned[pca.LY_A_PEAK_INDEX:]
        ar_red_ivar_rebinned = ar_ivar_rebinned[pca.LY_A_PEAK_INDEX:]

        # Suzuki 2004 normalizes flux according to 21 pixels around 1280
        normalization_factor = \
            ar_red_flux_rebinned[pca.LY_A_NORMALIZATION_INDEX - 10:pca.LY_A_NORMALIZATION_INDEX + 11].mean()
        ar_red_flux_rebinned_normalized = ar_red_flux_rebinned / float(
            normalization_factor)

        ar_full_fit = None
        if not np.any(ar_red_ivar_rebinned) or not np.any(
                np.isfinite(ar_red_ivar_rebinned)):
            return np.zeros_like(
                pca.ar_wavelength_bins), pca.ar_wavelength_bins, 1, np.inf, 0

        for _ in range(3):
            # predict the full spectrum from the red part of the spectrum.
            ar_full_fit = self.fit_function(pca,
                                            ar_red_flux_rebinned_normalized,
                                            ar_red_ivar_rebinned)

            # restore the original flux scale
            ar_full_fit = ar_full_fit * normalization_factor
            ar_red_fit = ar_full_fit[pca.LY_A_PEAK_INDEX:]
            # mask 2.5 sigma absorption
            # suppress error when dividing by 0, because 0 ivar is already masked, so the code has no effect anyway.
            with np.errstate(divide='ignore', invalid='ignore'):
                ar_absorption_mask = ar_red_flux_rebinned - ar_red_fit < -2.5 * (
                    ar_red_ivar_rebinned**-0.5)
            # print "masked ", float(ar_absorption_mask.sum())/ar_absorption_mask.size, " of pixels in iteration ", i
            ar_red_ivar_rebinned[ar_absorption_mask] = 0

        ar_blue_fit = ar_full_fit[:pca.LY_A_PEAK_INDEX]
        ar_blue_flux_rebinned = ar_flux_rebinned[:pca.LY_A_PEAK_INDEX]
        ar_blue_ivar_rebinned = ar_ivar_rebinned[:pca.LY_A_PEAK_INDEX]
        ar_blue_fit_mean_flux_rebinned = ar_mean_flux_constraint[:pca.
                                                                 LY_A_PEAK_INDEX] * ar_blue_fit
        # ignore pixels with 0 ivar
        ar_blue_data_mask = np.logical_and(np.isfinite(ar_blue_flux_rebinned),
                                           ar_blue_ivar_rebinned)

        if np.array(ar_blue_data_mask).sum() > 50:
            # find the optimal mean flux regulation:
            params = lmfit.Parameters()
            params.add('a_mf', value=0, min=-300, max=300)
            if qso_redshift > 2.4:
                # there are enough forest pixels for a 2nd order fit:
                params.add('b_mf', value=0, min=-300, max=300)
                result = lmfit.minimize(
                    fcn=self.regulate_mean_flux_2nd_order_residual,
                    params=params,
                    args=(pca, ar_blue_flux_rebinned,
                          ar_blue_fit_mean_flux_rebinned, ar_blue_data_mask))
                # apply the 2nd order mean flux regulation to the continuum fit:
                ar_regulated_blue_flux = self.mean_flux_2nd_order_correction(
                    result.params, ar_blue_fit, pca.delta_wavelength,
                    pca.delta_wavelength_sq)
            else:
                # low redshift makes most of the forest inaccessible,
                # use a 1st order fit to avoid over-fitting.
                result = lmfit.minimize(
                    fcn=self.regulate_mean_flux_1st_order_residual,
                    params=params,
                    args=(pca, ar_blue_flux_rebinned,
                          ar_blue_fit_mean_flux_rebinned, ar_blue_data_mask))

                # apply the 1st order mean flux regulation to the continuum fit:
                ar_regulated_blue_flux = self.mean_flux_1st_order_correction(
                    result.params, ar_blue_fit, pca.delta_wavelength)

            # overwrite the original blue fit with the regulated fit.
            ar_full_fit[:pca.LY_A_PEAK_INDEX] = ar_regulated_blue_flux
        else:
            is_good_fit = False

        goodness_of_fit = self.get_goodness_of_fit(
            pca, ar_flux_rebinned, ar_full_fit) if is_good_fit else np.inf
        snr = self.get_simple_snr(
            ar_flux_rebinned[pca.LY_A_PEAK_INDEX:pca.
                             RED_END_GOODNESS_OF_FIT_INDEX],
            ar_ivar_rebinned[pca.LY_A_PEAK_INDEX:pca.
                             RED_END_GOODNESS_OF_FIT_INDEX])

        return ar_full_fit, pca.ar_wavelength_bins, normalization_factor, goodness_of_fit, snr
コード例 #23
0
ファイル: read_spectrum_fits.py プロジェクト: yishayv/lyacorr
def enum_spectra(qso_record_table,
                 plate_dir_list=PLATE_DIR_DEFAULT,
                 pre_sort=True,
                 flag_stats=None,
                 and_mask=AND_MASK,
                 or_mask=OR_MASK):
    """
    yields a QSO object from the fits files corresponding to the appropriate qso_record
    :type qso_record_table: table.Table
    :type plate_dir_list: list[string]
    :type pre_sort: bool
    :type flag_stats: Optional[FlagStats]
    :param and_mask: set ivar=0 according to these and-mask flags
    :param or_mask: set ivar=0 according to these or-mask flags
    :rtype: list[QSOData]
    """
    last_fits_partial_path = None
    # sort by plate to avoid reopening files too many times
    if pre_sort:
        qso_record_table.sort(['plate', 'mjd', 'fiberID'])

    for i in qso_record_table:
        qso_rec = QSORecord.from_row(i)
        fits_partial_path = get_fits_partial_path(qso_rec)

        # skip reading headers and getting data objects if the filename hasn't changed
        if fits_partial_path != last_fits_partial_path:
            fits_full_path = find_fits_file(plate_dir_list, fits_partial_path)
            if not fits_full_path:
                raise Exception("Missing file:", fits_partial_path)

            # get header
            hdu_list = fits.open(fits_full_path, memmap=True)
            hdu0_header = hdu_list[0].header
            hdu1_header = hdu_list[1].header

            l1 = hdu1_header["NAXIS1"]

            c0 = hdu0_header["COEFF0"]
            c1 = hdu0_header["COEFF1"]
            l = hdu0_header["NAXIS1"]

            assert l1 == l, "flux and ivar dimensions must be equal"

            # wavelength grid
            counter = np.arange(0, l)
            o_grid = 10**(c0 + c1 * counter)

            # get flux_data
            flux_data = hdu_list[0].data
            ivar_data = hdu_list[1].data

            and_mask_data = hdu_list[2].data
            or_mask_data = hdu_list[3].data
            last_fits_partial_path = fits_partial_path

        if any(var is None for var in (flux_data, ivar_data, and_mask_data,
                                       or_mask_data, o_grid)):
            raise Exception("Unexpected uninitialized variables.")
        # return requested spectrum
        ar_flux = flux_data[qso_rec.fiberID - 1]
        ar_ivar = ivar_data[qso_rec.fiberID - 1]
        assert ar_flux.size == ar_ivar.size

        current_and_mask_data = np.asarray(and_mask_data[qso_rec.fiberID - 1])
        current_or_mask_data = np.asarray(or_mask_data[qso_rec.fiberID - 1])
        ar_effective_mask = np.logical_or(current_and_mask_data & and_mask,
                                          current_or_mask_data & or_mask)

        if flag_stats is not None:
            for bit in range(0, 32):
                flag_stats.flag_count[bit,
                                      0] += (current_and_mask_data & 1).sum()
                flag_stats.flag_count[bit,
                                      1] += (current_or_mask_data & 1).sum()
                current_and_mask_data >>= 1
                current_or_mask_data >>= 1
            flag_stats.pixel_count += current_and_mask_data.size

        # temporary: set ivar to 0 for all bad pixels
        ar_ivar[ar_effective_mask != 0] = 0

        yield QSOData(qso_rec, o_grid, ar_flux, ar_ivar)
コード例 #24
0
def profile_main():
    galaxy_metadata_file_npy = settings.get_galaxy_metadata_npy()
    histogram_output_npz = settings.get_ism_real_median_npz()

    galaxy_record_table = table.Table(np.load(galaxy_metadata_file_npy))

    num_extinction_bins = settings.get_num_extinction_bins()

    extinction_field_name = settings.get_extinction_source()

    # group results into extinction bins with roughly equal number of spectra.
    galaxy_record_table.sort([extinction_field_name])

    # remove objects with unknown extinction
    galaxy_record_table = galaxy_record_table[np.where(
        np.isfinite(galaxy_record_table[extinction_field_name]))]

    # if comm.size > num_extinction_bins:
    #     raise Exception('too many MPI nodes')

    # split the work into 'jobs' for each mpi node.
    # a job is defined as a single extinction bin.
    # the index of every extinction bin is its job number.

    job_sizes, job_offsets = get_chunks(num_extinction_bins, comm.size)
    job_start = job_offsets[comm.rank]
    job_end = job_start + job_sizes[comm.rank]

    chunk_sizes, chunk_offsets = get_chunks(len(galaxy_record_table),
                                            num_extinction_bins)

    for i in range(job_start, job_end):
        extinction_bin_start = chunk_offsets[i]
        extinction_bin_end = extinction_bin_start + chunk_sizes[i]

        extinction_bin_record_table = galaxy_record_table[
            extinction_bin_start:extinction_bin_end]

        # this should be done before plate sort
        group_parameters = {
            'extinction_bin_number':
            i,
            'extinction_minimum':
            extinction_bin_record_table[extinction_field_name][0],
            'extinction_maximum':
            extinction_bin_record_table[extinction_field_name][-1],
            'extinction_mean':
            np.mean(extinction_bin_record_table[extinction_field_name]),
            'extinction_median':
            np.median(extinction_bin_record_table[extinction_field_name]),
        }

        # sort by plate to avoid constant switching of fits files (which are per plate).
        extinction_bin_record_table.sort(['plate', 'mjd', 'fiberID'])

        base_filename, file_extension = splitext(histogram_output_npz)
        output_filename = '{}_{:02d}{}'.format(base_filename, i,
                                               file_extension)

        l_print_no_barrier('Starting extinction bin {}'.format(i))
        calc_median_spectrum(extinction_bin_record_table,
                             output_filename,
                             group_parameters=group_parameters)
        l_print_no_barrier('Finished extinction bin {}'.format(i))

    for _ in barrier_sleep(comm, use_yield=True):
        l_print_no_barrier("waiting")
        pass
コード例 #25
0
def get_update_mask(num_updates, num_items):
    mask = np.zeros(num_items, dtype=bool)
    for i in range(num_updates):
        mask[int((i + 1) * num_items / num_updates) - 1] = True
    return mask
コード例 #26
0
def profile_main():
    qso_record_table = table.Table(np.load(settings.get_qso_metadata_npy()))
    qso_record_list = [QSORecord.from_row(i) for i in qso_record_table]

    qso_spectra_hdf5 = settings.get_qso_spectra_hdf5()
    output_spectra = Hdf5SpectrumContainer(qso_spectra_hdf5,
                                           readonly=False,
                                           create_new=False,
                                           num_spectra=MAX_SPECTRA)
    total_ar_x = np.array([])
    total_ar_y = np.array([])
    total_ar_z = np.array([])
    total_ar_c = np.array([])

    for n in range(len(qso_record_list)):
        qso_rec = qso_record_list[n]
        redshift = qso_rec.z

        # load data
        ar_wavelength = output_spectra.get_wavelength(n)
        ar_flux = output_spectra.get_flux(n)
        ar_ivar = output_spectra.get_ivar(n)

        # convert wavelength to redshift
        ar_redshift = ar_wavelength / lya_center - 1

        # fit continuum
        ar_rest_wavelength = ar_wavelength / (1 + redshift)

        fit_result = fit_pca.fit(ar_rest_wavelength,
                                 ar_flux,
                                 ar_ivar,
                                 qso_redshift=redshift,
                                 boundary_value=np.nan,
                                 mean_flux_constraint_func=None)

        # transmission is only meaningful in the ly_alpha range, and also requires a valid fit for that wavelength
        # use the same range as in 1404.1801 (2014)
        forest_mask = np.logical_and(ar_wavelength > 1040 * (1 + redshift),
                                     ar_wavelength < 1200 * (1 + redshift))
        fit_mask = ~np.isnan(fit_result.spectrum)
        effective_mask = forest_mask & fit_mask
        # ar_wavelength_masked = ar_wavelength[effective_mask]
        # ar_fit_spectrum_masked = fit_result.spectrum[effective_mask]

        # convert redshift to distance
        ar_dist = np.asarray(
            cd.fast_comoving_distance(ar_redshift[effective_mask]))

        dec = qso_rec.dec * np.pi / 180
        ra = qso_rec.ra * np.pi / 180
        x_unit = np.cos(dec) * np.cos(ra)
        y_unit = np.cos(dec) * np.sin(ra)
        z_unit = np.sin(dec)

        scale = 1
        ar_x = x_unit * ar_dist * scale
        ar_y = y_unit * ar_dist * scale
        # Note: this is the geometric coordinate, not redshift
        ar_z = z_unit * ar_dist * scale

        ar_mock_forest_array = mock_forest.get_forest(ar_x, ar_y, ar_z)

        ar_delta_t = -ar_mock_forest_array

        ar_rel_transmittance = ar_delta_t + 1

        # set the forest part of the spectrum to the mock forest
        mock_fraction = 1
        ar_flux[effective_mask] = \
            ar_flux[effective_mask] * (1 - mock_fraction) + \
            ar_rel_transmittance * fit_result.spectrum[effective_mask] * mock_fraction

        if draw_graph:
            display_mask = ar_mock_forest_array > 0.
            total_ar_x = np.append(total_ar_x, ar_x[display_mask])
            total_ar_y = np.append(total_ar_y, ar_y[display_mask])
            total_ar_z = np.append(total_ar_z, ar_z[display_mask])
            total_ar_c = np.append(total_ar_c,
                                   ar_mock_forest_array[display_mask])

        # overwrite the existing forest
        output_spectra.set_flux(n, ar_flux)
        if n % 1000 == 0:
            print(n)

    if draw_graph:
        mlab.points3d(total_ar_x,
                      total_ar_y,
                      total_ar_z,
                      total_ar_c,
                      mode='sphere',
                      scale_mode='vector',
                      scale_factor=20,
                      transparent=True,
                      vmin=0,
                      vmax=1,
                      opacity=0.03)
        mlab.show()
コード例 #27
0
def ism_transmittance_chunk(qso_record_table):
    start_offset = qso_record_table[0]['index']
    # spectra = read_spectrum_hdf5.SpectraWithMetadata(qso_record_table, settings.get_qso_spectra_hdf5())
    # continuum_fit_file = NpSpectrumContainer(True, filename=settings.get_continuum_fit_npy())
    delta_transmittance_file = NpSpectrumContainer(
        readonly=True,
        filename=settings.get_delta_t_npy(),
        max_wavelength_count=1000)

    num_spectra = len(qso_record_table)
    ism_delta_t = NpSpectrumContainer(False, num_spectra=num_spectra)
    # warning: np.ndarray is not initialized by default. zeroing manually.
    ism_delta_t.zero()
    n = 0
    for i in range(len(qso_record_table)):
        qso_rec = QSORecord.from_row(qso_record_table[i])
        index = qso_rec.index

        # read original delta transmittance
        ar_redshift = delta_transmittance_file.get_wavelength(index)
        # ar_flux = delta_transmittance_file.get_flux(index)
        ar_ivar = delta_transmittance_file.get_ivar(index)

        # get correction to ISM
        # ar_flux_new, ar_ivar_new, is_corrected = pre_process_spectrum.mw_lines.apply_correction(
        #     ar_wavelength, np.ones_like(ar_flux), ar_ivar, qso_rec.ra, qso_rec.dec)

        ar_wavelength = (ar_redshift + 1) * lya_center  # type: np.ndarray
        # limit maximum bin number because higher extinction bins are not reliable
        max_extinction_bin = max(20, ar_extinction_levels.size)

        if np.isfinite(qso_rec.extinction_g):
            extinction_bin = int(
                np.round(
                    np.interp(qso_rec.extinction_g, ar_extinction_levels,
                              np.arange(max_extinction_bin))))
        else:
            extinction_bin = 0

        l_print_no_barrier("extinction_bin = ", extinction_bin)
        ar_ism_resampled = np.interp(
            ar_wavelength,
            extinction_spectra_list[extinction_bin][0],
            extinction_spectra_list[extinction_bin][1],
            left=np.nan,
            right=np.nan)
        extinction = ar_extinction_levels[extinction_bin]
        # rescale according to QSO extinction
        l_print_no_barrier(qso_rec.extinction_g, extinction)
        ism_scale_factor = 1.
        ar_flux_new = (ar_ism_resampled - 1
                       ) * ism_scale_factor * qso_rec.extinction_g / extinction

        mask = np.logical_and(np.isfinite(ar_flux_new), ar_ivar)

        ism_delta_t.set_wavelength(i, ar_redshift[mask])
        # use reciprocal to get absorption spectrum, then subtract 1 to get the delta
        ism_delta_t.set_flux(i, ar_flux_new[mask])
        # ism_delta_t.set_flux(i, np.ones_like(ar_flux) * qso_rec.extinction_g)
        # use original ivar because we are not correcting an existing spectrum
        ism_delta_t.set_ivar(i, ar_ivar[mask])

        n += 1

    l_print_no_barrier("chunk n =", n, "offset =", start_offset)
    return ism_delta_t.as_np_array(), None