Ejemplo n.º 1
0
    def __init__(self, path, data, command_line):

        Likelihood.__init__(self, path, data, command_line)

        # Check if the data can be found, although we don't actually use that
        # particular file but take it as a placeholder for the folder
        try:
            fname = os.path.join(
                self.data_directory,
                'DATA_VECTOR/KiDS-450_xi_pm_tomographic_data_vector.dat')
            parser_mp.existing_file(fname)
        except:
            raise io_mp.ConfigurationError(
                'KiDS-450 CF data not found. Download the data at '
                'http://kids.strw.leidenuniv.nl/sciencedata.php '
                'and specify path to data through the variable '
                'kids450_cf_2cosmos_likelihood_public.data_directory in '
                'the .data file. See README in likelihood folder '
                'for further instructions.')

        # for loading of Nz-files:
        self.z_bins_min = [0.1, 0.3, 0.5, 0.7]
        self.z_bins_max = [0.3, 0.5, 0.7, 0.9]

        # number of angular bins in which xipm is measured
        # we always load the full data vector with 9 data points for xi_p and
        # xi_m each; they are cut to the fiducial scales (or any arbitrarily
        # defined scales with the 'cut_values.dat' files!
        self.ntheta = 9

        # Force the cosmological module to store Pk for redshifts up to
        # max(self.z) and for k up to k_max
        self.need_cosmo1_arguments(data, {'output': 'mPk'})
        self.need_cosmo1_arguments(data,
                                   {'P_k_max_h/Mpc': self.k_max_h_by_Mpc})
        self.need_cosmo2_arguments(data, {'output': 'mPk'})
        self.need_cosmo2_arguments(data,
                                   {'P_k_max_h/Mpc': self.k_max_h_by_Mpc})

        # Compute non-linear power spectrum if requested:
        if self.method_non_linear_Pk in [
                'halofit', 'HALOFIT', 'Halofit', 'hmcode', 'Hmcode', 'HMcode',
                'HMCODE'
        ]:
            self.need_cosmo1_arguments(
                data, {'non linear': self.method_non_linear_Pk})
            self.need_cosmo2_arguments(
                data, {'non linear': self.method_non_linear_Pk})
            print('Using {:} to obtain the non-linear P(k, z)!'.format(
                self.method_non_linear_Pk))
        else:
            print(
                'Only using the linear P(k, z) for ALL calculations \n (check keywords for "method_non_linear_Pk").'
            )

        self.nzbins = len(self.z_bins_min)
        self.nzcorrs = self.nzbins * (self.nzbins + 1) // 2

        # Create labels for loading of dn/dz-files:
        self.zbin_labels = []
        for i in xrange(self.nzbins):
            self.zbin_labels += [
                '{:.1f}t{:.1f}'.format(self.z_bins_min[i], self.z_bins_max[i])
            ]

        # Define array of l values, and initialize them
        # It is a logspace
        # find nlmax in order to reach lmax with logarithmic steps dlnl
        self.nlmax = np.int(np.log(self.lmax) / self.dlnl) + 1
        # redefine slightly dlnl so that the last point is always exactly lmax
        self.dlnl = np.log(self.lmax) / (self.nlmax - 1)
        self.l = np.exp(self.dlnl * np.arange(self.nlmax))

        #TODO: not really needed when bootstrap-errors are selected...
        # Read fiducial dn_dz from window files:
        # TODO: zmin and zmax are hardcoded to fiducial lower and upper limit
        # of midpoint histogram!
        self.z_p = np.linspace(0.025, 3.475, self.nzmax)
        self.pz = np.zeros((self.nzmax, self.nzbins))
        self.pz_norm = np.zeros(self.nzbins, 'float64')
        for zbin in xrange(self.nzbins):
            window_file_path = os.path.join(
                self.data_directory,
                'Nz_{0:}/Nz_{0:}_Mean/Nz_{0:}_z{1:}.asc'.format(
                    self.nz_method, self.zbin_labels[zbin]))
            if os.path.exists(window_file_path):
                zptemp, hist_pz = np.loadtxt(window_file_path,
                                             usecols=[0, 1],
                                             unpack=True)
                if zbin > 0:
                    zpcheck = zptemp
                    if np.sum((zptemp - zpcheck)**2) > 1e-6:
                        raise io_mp.LikelihoodError(
                            'The redshift values for the window files at different bins do not match.'
                        )
                print('Loaded n(zbin{:}) from: \n'.format(zbin + 1),
                      window_file_path)
                # we assume that the histograms loaded are given as left-border histograms
                # and that the z-spacing is the same for each histogram
                shift_to_midpoint = np.diff(zptemp)[0] / 2.
                spline_pz = itp.splrep(zptemp + shift_to_midpoint, hist_pz)
                z_mod = self.z_p  #+ self.shift_by_dz[zbin]
                mask_min = z_mod >= zptemp.min()
                mask_max = z_mod <= zptemp.max()
                mask = mask_min & mask_max
                self.pz[mask, zbin] = itp.splev(z_mod[mask], spline_pz)
                # Normalize selection functions
                dz = self.z_p[1:] - self.z_p[:-1]
                self.pz_norm[zbin] = np.sum(
                    0.5 * (self.pz[1:, zbin] + self.pz[:-1, zbin]) * dz)
            else:
                raise io_mp.LikelihoodError("File not found:\n %s" %
                                            window_file_path)

        self.zmax = self.z_p.max()
        self.need_cosmo1_arguments(data, {'z_max_pk': self.zmax})
        self.need_cosmo2_arguments(data, {'z_max_pk': self.zmax})

        # read in public data vector:
        temp = self.__load_public_data_vector()
        self.theta_bins = temp[:, 0]
        if (np.sum(
            (self.theta_bins[:self.ntheta] - self.theta_bins[self.ntheta:])**2)
                > 1e-6):
            raise io_mp.LikelihoodError(
                'The angular values at which xi+ and xi- '
                'are observed do not match')

        # create the data-vector in the following format (due to covariance structure):
        # xi_obs = {xi1(theta1, z_11)...xi1(theta_k, z_11), xi2(theta_1, z_11)...
        #           xi2(theta_k, z_11);...; xi1(theta1, z_nn)...xi1(theta_k, z_nn),
        #           xi2(theta_1, z_nn)... xi2(theta_k, z_nn)}
        xi_obs = self.__get_xi_obs(temp[:, 1:])

        # concatenate xi_obs with itself to create the ueberdata-vector:
        self.xi_obs_1 = xi_obs
        self.xi_obs_2 = xi_obs

        xi_obs_combined = np.concatenate((xi_obs, xi_obs))

        # now load the full covariance matrix:
        covmat_block = self.__load_public_cov_mat()

        # build a combined cov-mat, for that to work we assume, that the cov-mat dimension fits
        # to the size of the *uncut*, single data-vector and is ordered in the same way as the
        # *final* data-vector created here (i.e. vec = [xi+(1,1), xi-(1,1), xi+(1,2), xi-(1,2),...]!
        covmat = np.asarray(
            np.bmat('covmat_block, covmat_block; covmat_block, covmat_block'))

        # Read angular cut values (OPTIONAL)
        # 1 --> fiducial scales
        # 2 --> large scales

        # Read angular cut values (OPTIONAL)
        if self.use_cut_theta:
            cut_values1 = np.zeros((self.nzbins, 2))
            cut_values2 = np.zeros((self.nzbins, 2))

            cutvalues_file_path1 = os.path.join(
                self.data_directory, 'CUT_VALUES/' + self.cutvalues_file1)
            if os.path.exists(cutvalues_file_path1):
                cut_values1 = np.loadtxt(cutvalues_file_path1)
            else:
                raise io_mp.LikelihoodError(
                    'File not found:\n {:} \n Check that requested file was copied to:\n {:}'
                    .format(cutvalues_file_path1,
                            self.data_directory + 'CUT_VALUES/'))

            cutvalues_file_path2 = os.path.join(
                self.data_directory, 'CUT_VALUES/' + self.cutvalues_file2)
            if os.path.exists(cutvalues_file_path2):
                cut_values2 = np.loadtxt(cutvalues_file_path2)
            else:
                raise io_mp.LikelihoodError(
                    'File not found:\n {:} \n Check that requested file was copied to:\n {:}'
                    .format(cutvalues_file_path2,
                            self.data_directory + 'CUT_VALUES/'))

        # Compute theta mask
        if self.use_cut_theta:
            mask1 = self.__get_mask(cut_values1)
            mask2 = self.__get_mask(cut_values2)
        else:
            mask1 = np.ones(2 * self.nzcorrs * self.ntheta)
            mask2 = np.ones(2 * self.nzcorrs * self.ntheta)

        #print(mask1, len(np.where(mask1 == 1)[0]))
        #print(mask2, len(np.where(mask2 == 1)[0]))
        # for tomographic splits:
        # e.g.
        # mask1 = fiducial
        # mask2 = z-bin 3 only (gives also all cross_powers)
        # --> mask1 = mask1 - mask2 --> all remaining bin combinations
        if self.subtract_mask2_from_mask1:
            mask1 = mask1 - mask2

        #print(mask1, len(np.where(mask1 == 1)[0]))
        #print(mask2, len(np.where(mask2 == 1)[0]))

        self.mask_indices1 = np.where(mask1 == 1)[0]
        self.mask_indices2 = np.where(mask2 == 1)[0]

        # combine "fiducial" mask and "large scales" mask:
        # this is wrong, because indices in second half are only wrt. first half!!!
        #self.mask_indices = np.concatenate((self.mask_indices1, self.mask_indices2))

        # combine "fiducial" mask and "large scales" mask:
        mask = np.concatenate((mask1, mask2))
        self.mask_indices = np.where(mask == 1)[0]

        # apply equation 12 from Hildebrandt et al. 2017 to covmat:
        # this assumes that m-correction was already applied to data-vector!
        if self.marginalize_over_multiplicative_bias_uncertainty:
            cov_m_corr = np.matrix(
                xi_obs_combined[self.mask_indices]).T * np.matrix(
                    xi_obs_combined[self.mask_indices]
                ) * 4. * self.err_multiplicative_bias**2
            #covmat = covmat[self.mask_indices][:, self.mask_indices] + np.asarray(cov_m_corr)
            covmat = covmat[np.ix_(self.mask_indices,
                                   self.mask_indices)] + np.asarray(cov_m_corr)
        else:
            #covmat = covmat[self.mask_indices][:, self.mask_indices]
            covmat = covmat[np.ix_(self.mask_indices, self.mask_indices)]

        fname = self.data_directory + 'cov_matrix_ana_comb_cut.dat'
        np.savetxt(fname, covmat)
        print('Saved trimmed covariance to: \n', fname)

        # precompute Cholesky transform for chi^2 calculation:
        self.cholesky_transform = cholesky(covmat, lower=True)

        # Fill array of discrete z values
        # self.z = np.linspace(0, self.zmax, num=self.nzmax)
        '''
        ################
        # Noise spectrum
        ################
        # only useful for theoretical signal

        # Number of galaxies per steradian
        self.noise = 3600.*self.gal_per_sqarcmn*(180./math.pi)**2

        # Number of galaxies per steradian per bin
        self.noise = self.noise/self.nzbins

        # Noise spectrum (diagonal in bin*bin space, independent of l and Bin)
        self.noise = self.rms_shear**2/self.noise
        '''

        ################################################
        # discrete theta values (to convert C_l to xi's)
        ################################################

        thetamin = np.min(self.theta_bins) * 0.8
        thetamax = np.max(self.theta_bins) * 1.2

        self.nthetatot = np.ceil(
            math.log(thetamax / thetamin) / self.dlntheta) + 1
        self.nthetatot = np.int32(self.nthetatot)
        self.theta = np.zeros(self.nthetatot, 'float64')
        self.a2r = math.pi / (180. * 60.)

        # define an array of theta's
        for it in xrange(self.nthetatot):
            self.theta[it] = thetamin * math.exp(self.dlntheta * it)

        ################################################################
        # discrete l values used in the integral to convert C_l to xi's)
        ################################################################

        # l = x / theta / self.a2r
        # x = l * theta * self.a2r

        # We start by considering the largest theta, theta[-1], and for that value we infer
        # a list of l's from the requirement that corresponding x values are spaced linearly with a given stepsize, until xmax.
        # Then we loop over smaller theta values, in decreasing order, and for each of them we complete the previous list of l's,
        # always requiuring the same dx stepsize (so that dl does vary) up to xmax.
        #
        # We first apply this to a running value ll, in order to count the total numbner of ll's, called nl.
        # Then we create the array lll[nl] and we fill it with the same values.
        #
        # we also compute on the fly the critical index il_max[it] such that ll[il_max[it]]*self.theta[it]*self.a2r
        # is the first value of x above xmax

        ll = 1.
        il = 0
        while (ll * self.theta[-1] * self.a2r < self.dx_threshold):
            ll += self.dx_below_threshold / self.theta[-1] / self.a2r
            il += 1
        for it in xrange(self.nthetatot):
            while (ll * self.theta[self.nthetatot - 1 - it] * self.a2r <
                   self.xmax) and (ll + self.dx_above_threshold /
                                   self.theta[self.nthetatot - 1 - it] /
                                   self.a2r < self.lmax):
                ll += self.dx_above_threshold / self.theta[self.nthetatot - 1 -
                                                           it] / self.a2r
                il += 1
        self.nl = il + 1

        self.lll = np.zeros(self.nl, 'float64')
        self.il_max = np.zeros(self.nthetatot, 'int')
        il = 0
        self.lll[il] = 1.
        while (self.lll[il] * self.theta[-1] * self.a2r < self.dx_threshold):
            il += 1
            self.lll[il] = self.lll[
                il - 1] + self.dx_below_threshold / self.theta[-1] / self.a2r
        for it in xrange(self.nthetatot):
            while (self.lll[il] * self.theta[self.nthetatot - 1 - it] *
                   self.a2r < self.xmax) and (
                       self.lll[il] + self.dx_above_threshold /
                       self.theta[self.nthetatot - 1 - it] / self.a2r <
                       self.lmax):
                il += 1
                self.lll[il] = self.lll[
                    il - 1] + self.dx_above_threshold / self.theta[
                        self.nthetatot - 1 - it] / self.a2r
            self.il_max[self.nthetatot - 1 - it] = il

        # finally we compute the array l*dl that will be used in the trapezoidal integration
        # (l is a factor in the integrand [l * C_l * Bessel], and dl is like a weight)
        self.ldl = np.zeros(self.nl, 'float64')
        self.ldl[0] = self.lll[0] * 0.5 * (self.lll[1] - self.lll[0])
        for il in xrange(1, self.nl - 1):
            self.ldl[il] = self.lll[il] * 0.5 * (self.lll[il + 1] -
                                                 self.lll[il - 1])
        self.ldl[-1] = self.lll[-1] * 0.5 * (self.lll[-1] - self.lll[-2])

        return
Ejemplo n.º 2
0
    def __init__(self, path, data, command_line):
        # I should already take care of using only GRF mocks or data here (because of different folder-structures etc...)
        # or for now just write it for GRFs for tests and worry about it later...
        Likelihood.__init__(self, path, data, command_line)

        # Check if the data can be found
        try:
            fname = os.path.join(self.data_directory,
                                 'Resetting_bias/parameters_B_mode_model.dat')
            parser_mp.existing_file(fname)
        except:
            raise io_mp.ConfigurationError(
                'KiDS-450 QE data not found. Download the data at '
                'http://kids.strw.leidenuniv.nl/sciencedata.php '
                'and specify path to data through the variable '
                'kids450_qe_likelihood_public.data_directory in '
                'the .data file. See README in likelihood folder '
                'for further instructions.')

        # TODO: this is also CFHTLenS legacy...
        # only relevant for GRFs!
        #dict_BWM = {'W1': 'G10_', 'W2': 'G126_', 'W3': 'G162_', 'W4': 'G84_'}

        self.need_cosmo_arguments(data, {'output': 'mPk'})

        self.redshift_bins = []
        for index_zbin in xrange(len(self.zbin_min)):
            redshift_bin = '{:.2f}z{:.2f}'.format(self.zbin_min[index_zbin],
                                                  self.zbin_max[index_zbin])
            self.redshift_bins.append(redshift_bin)

        # number of z-bins
        self.nzbins = len(self.redshift_bins)
        # number of *unique* correlations between z-bins
        self.nzcorrs = self.nzbins * (self.nzbins + 1) / 2

        all_bands_EE_to_use = []
        all_bands_BB_to_use = []
        '''
        if self.fit_cross_correlations_only:
            # mask out auto-spectra:
            for index_zbin1 in xrange(self.nzbins):
                for index_zbin2 in xrange(index_zbin1 + 1):
                    if index_zbin1 == index_zbin2:
                        all_bands_EE_to_use += np.zeros_like(self.bands_EE_to_use).tolist()
                        all_bands_BB_to_use += np.zeros_like(self.bands_BB_to_use).tolist()
                    else:
                        all_bands_EE_to_use += self.bands_EE_to_use
                        all_bands_BB_to_use += self.bands_BB_to_use

        else:
            # default, use all correlations:
            for i in xrange(self.nzcorrs):
                all_bands_EE_to_use += self.bands_EE_to_use
                all_bands_BB_to_use += self.bands_BB_to_use
        '''
        # default, use all correlations:
        for i in xrange(self.nzcorrs):
            all_bands_EE_to_use += self.bands_EE_to_use
            all_bands_BB_to_use += self.bands_BB_to_use

        all_bands_to_use = np.concatenate(
            (all_bands_EE_to_use, all_bands_BB_to_use))
        self.indices_for_bands_to_use = np.where(
            np.asarray(all_bands_to_use) == 1)[0]

        # this is also the number of points in the datavector
        ndata = len(self.indices_for_bands_to_use)

        # I should load all the data needed only once, i.e. HERE:
        # not so sure about statement above, I have the feeling "init" is called for every MCMC step...
        # maybe that's why the memory is filling up on other machines?! --> nope, that wasn't the reason...
        start_load = time.time()

        if self.correct_resetting_bias:
            fname = os.path.join(self.data_directory,
                                 'Resetting_bias/parameters_B_mode_model.dat')
            A_B_modes, exp_B_modes, err_A_B_modes, err_exp_B_modes = np.loadtxt(
                fname, unpack=True)
            self.params_resetting_bias = np.array([A_B_modes, exp_B_modes])
            fname = os.path.join(self.data_directory,
                                 'Resetting_bias/covariance_B_mode_model.dat')
            self.cov_resetting_bias = np.loadtxt(fname)

        # try to load fiducial m-corrections from file (currently these are global values over full field, hence no looping over fields required for that!)
        # TODO: Make output dependent on field, not necessary for current KiDS approach though!
        try:
            fname = os.path.join(
                self.data_directory,
                '{:}zbins/m_correction_avg.txt'.format(self.nzbins))
            if self.nzbins == 1:
                self.m_corr_fiducial_per_zbin = np.asarray(
                    [np.loadtxt(fname, usecols=[1])])
            else:
                self.m_corr_fiducial_per_zbin = np.loadtxt(fname, usecols=[1])
        except:
            self.m_corr_fiducial_per_zbin = np.zeros(self.nzbins)
            print('Could not load m-correction values from \n', fname)
            print('Setting them to zero instead.')

        try:
            fname = os.path.join(
                self.data_directory,
                '{:}zbins/sigma_int_n_eff_{:}zbins.dat'.format(
                    self.nzbins, self.nzbins))
            tbdata = np.loadtxt(fname)
            if self.nzbins == 1:
                # correct columns for file!
                sigma_e1 = np.asarray([tbdata[2]])
                sigma_e2 = np.asarray([tbdata[3]])
                n_eff = np.asarray([tbdata[4]])
            else:
                # correct columns for file!
                sigma_e1 = tbdata[:, 2]
                sigma_e2 = tbdata[:, 3]
                n_eff = tbdata[:, 4]

            self.sigma_e = np.sqrt((sigma_e1**2 + sigma_e2**2) / 2.)
            # convert from 1 / sq. arcmin to 1 / sterad
            self.n_eff = n_eff / np.deg2rad(1. / 60.)**2
        except:
            # these dummies will set noise power always to 0!
            self.sigma_e = np.zeros(self.nzbins)
            self.n_eff = np.ones(self.nzbins)
            print('Could not load sigma_e and n_eff!')

        collect_bp_EE_in_zbins = []
        collect_bp_BB_in_zbins = []
        # collect BP per zbin and combine into one array
        for zbin1 in xrange(self.nzbins):
            for zbin2 in xrange(zbin1 + 1):  #self.nzbins):
                # zbin2 first in fname!
                fname_EE = os.path.join(
                    self.data_directory,
                    '{:}zbins/band_powers_EE_z{:}xz{:}.dat'.format(
                        self.nzbins, zbin1 + 1, zbin2 + 1))
                fname_BB = os.path.join(
                    self.data_directory,
                    '{:}zbins/band_powers_BB_z{:}xz{:}.dat'.format(
                        self.nzbins, zbin1 + 1, zbin2 + 1))
                extracted_band_powers_EE = np.loadtxt(fname_EE)
                extracted_band_powers_BB = np.loadtxt(fname_BB)
                collect_bp_EE_in_zbins.append(extracted_band_powers_EE)
                collect_bp_BB_in_zbins.append(extracted_band_powers_BB)

        self.band_powers = np.concatenate(
            (np.asarray(collect_bp_EE_in_zbins).flatten(),
             np.asarray(collect_bp_BB_in_zbins).flatten()))

        fname = os.path.join(
            self.data_directory,
            '{:}zbins/covariance_all_z_EE_BB.dat'.format(self.nzbins))
        self.covariance = np.loadtxt(fname)

        fname = os.path.join(
            self.data_directory,
            '{:}zbins/band_window_matrix_nell100.dat'.format(self.nzbins))
        self.band_window_matrix = np.loadtxt(fname)
        # ells_intp and also band_offset are consistent between different patches!

        fname = os.path.join(
            self.data_directory,
            '{:}zbins/multipole_nodes_for_band_window_functions_nell100.dat'.
            format(self.nzbins))
        self.ells_intp = np.loadtxt(fname)
        self.band_offset_EE = len(extracted_band_powers_EE)
        self.band_offset_BB = len(extracted_band_powers_BB)

        # Check if any of the n(z) needs to be shifted in loglkl by D_z{1...n}:
        self.shift_n_z_by_D_z = np.zeros(self.nzbins, 'bool')
        for zbin in xrange(self.nzbins):
            param_name = 'D_z{:}'.format(zbin + 1)
            if param_name in data.mcmc_parameters:
                self.shift_n_z_by_D_z[zbin] = True

        # Read fiducial dn_dz from window files:
        # TODO: the hardcoded z_min and z_max correspond to the lower and upper
        # endpoints of the shifted left-border histogram!
        z_samples = []
        hist_samples = []
        for zbin in xrange(self.nzbins):
            redshift_bin = self.redshift_bins[zbin]
            window_file_path = os.path.join(
                self.data_directory,
                '{:}/n_z_avg_{:}.hist'.format(self.photoz_method,
                                              redshift_bin))
            if os.path.exists(window_file_path):
                zptemp, hist_pz = np.loadtxt(window_file_path,
                                             usecols=[0, 1],
                                             unpack=True)
                shift_to_midpoint = np.diff(zptemp)[0] / 2.
                if zbin > 0:
                    zpcheck = zptemp
                    if np.sum((zptemp - zpcheck)**2) > 1e-6:
                        raise io_mp.LikelihoodError(
                            'The redshift values for the window files at different bins do not match.'
                        )
                print('Loaded n(zbin{:}) from: \n'.format(zbin + 1),
                      window_file_path)
                # we add a zero as first element because we want to integrate down to z = 0!
                z_samples += [
                    np.concatenate((np.zeros(1), zptemp + shift_to_midpoint))
                ]
                hist_samples += [np.concatenate((np.zeros(1), hist_pz))]
            else:
                raise io_mp.LikelihoodError("File not found:\n %s" %
                                            window_file_path)

        z_samples = np.asarray(z_samples)
        hist_samples = np.asarray(hist_samples)

        # prevent undersampling of histograms!
        if self.nzmax < len(zptemp):
            print(
                "You're trying to integrate at lower resolution than supplied by the n(z) histograms. \n Increase nzmax! Aborting now..."
            )
            exit()
        # if that's the case, we want to integrate at histogram resolution and need to account for
        # the extra zero entry added
        elif self.nzmax == len(zptemp):
            self.nzmax = z_samples.shape[1]
            # requires that z-spacing is always the same for all bins...
            self.redshifts = z_samples[0, :]
            print('Integrations performed at resolution of histogram!')
        # if we interpolate anyway at arbitrary resolution the extra 0 doesn't matter
        else:
            self.nzmax += 1
            self.redshifts = np.linspace(z_samples.min(), z_samples.max(),
                                         self.nzmax)
            print('Integration performed at set nzmax resolution!')

        self.pz = np.zeros((self.nzmax, self.nzbins))
        self.pz_norm = np.zeros(self.nzbins, 'float64')
        for zbin in xrange(self.nzbins):
            # we assume that the histograms loaded are given as left-border histograms
            # and that the z-spacing is the same for each histogram
            spline_pz = itp.splrep(z_samples[zbin, :], hist_samples[zbin, :])

            #z_mod = self.z_p
            mask_min = self.redshifts >= z_samples[zbin, :].min()
            mask_max = self.redshifts <= z_samples[zbin, :].max()
            mask = mask_min & mask_max
            # points outside the z-range of the histograms are set to 0!
            self.pz[mask, zbin] = itp.splev(self.redshifts[mask], spline_pz)
            # Normalize selection functions
            dz = self.redshifts[1:] - self.redshifts[:-1]
            self.pz_norm[zbin] = np.sum(
                0.5 * (self.pz[1:, zbin] + self.pz[:-1, zbin]) * dz)

        self.z_max = self.redshifts.max()

        # k_max is arbitrary at the moment, since cosmology module is not calculated yet...TODO
        if self.mode == 'halofit':
            self.need_cosmo_arguments(
                data, {
                    'z_max_pk': self.z_max,
                    'output': 'mPk',
                    'non linear': self.mode,
                    'P_k_max_h/Mpc': self.k_max_h_by_Mpc
                })
        else:
            self.need_cosmo_arguments(
                data, {
                    'z_max_pk': self.z_max,
                    'output': 'mPk',
                    'P_k_max_h/Mpc': self.k_max_h_by_Mpc
                })

        print('Time for loading all data files:', time.time() - start_load)

        fname = os.path.join(self.data_directory, 'number_datapoints.txt')
        np.savetxt(fname, [ndata],
                   header='number of datapoints in masked datavector')

        return
Ejemplo n.º 3
0
    def __init__(self, path, data, command_line):

        Likelihood.__init__(self, path, data, command_line)

        # Check if the data can be found, although we don't actually use that
        # particular file but take it as a placeholder for the folder
        try:
            fname = os.path.join(
                self.data_directory,
                'DATA_VECTOR/KiDS-450_xi_pm_tomographic_data_vector.dat')
            parser_mp.existing_file(fname)
        except:
            raise io_mp.ConfigurationError(
                'KiDS-450 CF data not found. Download the data at '
                'http://kids.strw.leidenuniv.nl/sciencedata.php '
                'and specify path to data through the variable '
                'kids450_cf_likelihood_public.data_directory in '
                'the .data file. See README in likelihood folder '
                'for further instructions.')

        # for loading of Nz-files:
        self.z_bins_min = [0.1, 0.3, 0.5, 0.7]
        self.z_bins_max = [0.3, 0.5, 0.7, 0.9]

        # number of angular bins in which xipm is measured
        # we always load the full data vector with 9 data points for xi_p and
        # xi_m each; they are cut to the fiducial scales (or any arbitrarily
        # defined scales with the 'cut_values.dat' files!
        self.ntheta = 9

        # Force the cosmological module to store Pk for redshifts up to
        # max(self.z) and for k up to k_max
        self.need_cosmo_arguments(data, {'output': 'mPk'})
        self.need_cosmo_arguments(data, {'P_k_max_h/Mpc': self.k_max_h_by_Mpc})

        # Compute non-linear power spectrum if requested:
        if self.method_non_linear_Pk in [
                'halofit', 'HALOFIT', 'Halofit', 'hmcode', 'Hmcode', 'HMcode',
                'HMCODE'
        ]:
            self.need_cosmo_arguments(
                data, {'non linear': self.method_non_linear_Pk})
            print('Using {:} to obtain the non-linear P(k, z)!'.format(
                self.method_non_linear_Pk))
        else:
            print(
                'Only using the linear P(k, z) for ALL calculations \n (check keywords for "method_non_linear_Pk").'
            )

        # Define array of l values, and initialize them
        # It is a logspace
        # find nlmax in order to reach lmax with logarithmic steps dlnl
        self.nlmax = np.int(np.log(self.lmax) / self.dlnl) + 1
        # redefine slightly dlnl so that the last point is always exactly lmax
        self.dlnl = np.log(self.lmax) / (self.nlmax - 1)
        self.l = np.exp(self.dlnl * np.arange(self.nlmax))

        self.nzbins = len(self.z_bins_min)
        self.nzcorrs = self.nzbins * (self.nzbins + 1) / 2

        # Create labels for loading of dn/dz-files:
        self.zbin_labels = []
        for i in xrange(self.nzbins):
            self.zbin_labels += [
                '{:.1f}t{:.1f}'.format(self.z_bins_min[i], self.z_bins_max[i])
            ]

        # read in public data vector:
        temp = self.__load_public_data_vector()
        self.theta_bins = temp[:, 0]
        if (np.sum(
            (self.theta_bins[:self.ntheta] - self.theta_bins[self.ntheta:])**2)
                > 1e-6):
            raise io_mp.LikelihoodError(
                'The angular values at which xi+ and xi- '
                'are observed do not match')

        # create the data-vector in the following format (due to covariance structure):
        # xi_obs = {xi1(theta1, z_11)...xi1(theta_k, z_11), xi2(theta_1, z_11)...
        #           xi2(theta_k, z_11);...; xi1(theta1, z_nn)...xi1(theta_k, z_nn),
        #           xi2(theta_1, z_nn)... xi2(theta_k, z_nn)}
        self.xi_obs = self.__get_xi_obs(temp[:, 1:])

        # now load the full covariance matrix:
        covmat = self.__load_public_cov_mat()

        # Read angular cut values (OPTIONAL)
        if self.use_cut_theta:
            cut_values1 = np.zeros((self.nzbins, 2))
            cut_values2 = np.zeros((self.nzbins, 2))

            cutvalues_file_path1 = os.path.join(
                self.data_directory, 'CUT_VALUES/' + self.cutvalues_file1)
            if os.path.exists(cutvalues_file_path1):
                cut_values1 = np.loadtxt(cutvalues_file_path1)
            else:
                raise io_mp.LikelihoodError(
                    'File not found:\n {:} \n Check that requested file was copied to:\n {:}'
                    .format(cutvalues_file_path1,
                            self.data_directory + 'CUT_VALUES/'))

            if self.subtract_mask2_from_mask1:
                cutvalues_file_path2 = os.path.join(
                    self.data_directory, 'CUT_VALUES/' + self.cutvalues_file2)
                if os.path.exists(cutvalues_file_path2):
                    cut_values2 = np.loadtxt(cutvalues_file_path2)
                else:
                    raise io_mp.LikelihoodError(
                        'File not found:\n {:} \n Check that requested file was copied to:\n {:}'
                        .format(cutvalues_file_path2,
                                self.data_directory + 'CUT_VALUES/'))

        # Compute theta mask
        if self.use_cut_theta:
            mask1 = self.__get_mask(cut_values1)
            if self.subtract_mask2_from_mask1:
                mask2 = self.__get_mask(cut_values2)
                mask = mask1 - mask2
            else:
                mask = mask1
        else:
            mask = np.ones(2 * self.nzcorrs * self.ntheta)

        self.mask_indices = np.where(mask == 1)[0]
        fname = os.path.join(self.data_directory, 'kids450_xipm_4bin_cut.dat')
        np.savetxt(fname, self.xi_obs[self.mask_indices])

        # propagate uncertainty of m-correction following equation (12) in
        # Hildebrandt et al. 2017 (arXiv:1606.05338) with \sigma_m = 0.01
        # NOTE: following Troxel et al. 2018 (arXiv:1804.10663) it is NOT
        # correct to use the noisy data vector for this; instead one should use
        # a theory vector (e.g. derived for the same cosmology for which the
        # analytical covariance was calculated).
        fname = os.path.join(self.data_directory, 'cov_matrix_ana_cut.dat')
        if self.marginalize_over_multiplicative_bias_uncertainty:
            cov_m_corr = np.matrix(
                self.xi_obs[self.mask_indices]).T * np.matrix(self.xi_obs[
                    self.mask_indices]) * 4. * self.err_multiplicative_bias**2
            covmat = covmat[self.mask_indices][:,
                                               self.mask_indices] + np.asarray(
                                                   cov_m_corr)
            #covmat = covmat[np.ix_(self.mask_indices, self.mask_indices)]
            np.savetxt(fname, covmat)
            #covmat = covmat + np.asarray(cov_m_corr)
        else:
            #covmat = covmat[self.mask_indices][:, self.mask_indices]
            covmat = covmat[np.ix_(self.mask_indices, self.mask_indices)]
            np.savetxt(fname, covmat)

        # precompute Cholesky transform for chi^2 calculation:
        self.cholesky_transform = cholesky(covmat, lower=True)

        # Read fiducial dn_dz from window files:
        #self.z_p = np.zeros(self.nzmax)
        # TODO: the hardcoded z_min and z_max correspond to the lower and upper
        # endpoints of the shifted left-border histogram!
        self.z_p = np.linspace(0.025, 3.475, self.nzmax)
        self.pz = np.zeros((self.nzmax, self.nzbins))
        self.pz_norm = np.zeros(self.nzbins, 'float64')
        for zbin in xrange(self.nzbins):
            window_file_path = os.path.join(
                self.data_directory,
                'Nz_{0:}/Nz_{0:}_Mean/Nz_{0:}_z{1:}.asc'.format(
                    self.nz_method, self.zbin_labels[zbin]))

            zptemp, hist_pz = np.loadtxt(window_file_path,
                                         usecols=[0, 1],
                                         unpack=True)
            if zbin > 0:
                zpcheck = zptemp
                if np.sum((zptemp - zpcheck)**2) > 1e-6:
                    raise io_mp.LikelihoodError(
                        'The redshift values for the window files at different bins do not match.'
                    )
            print('Loaded n(zbin{:}) from: \n'.format(zbin + 1),
                  window_file_path)
            # we assume that the histograms loaded are given as left-border histograms
            # and that the z-spacing is the same for each histogram
            shift_to_midpoint = np.diff(zptemp)[0] / 2.
            spline_pz = itp.splrep(zptemp + shift_to_midpoint, hist_pz)
            z_mod = self.z_p  #+ shift_by_dz[zbin]
            mask_min = z_mod >= zptemp.min()
            mask_max = z_mod <= zptemp.max()
            mask = mask_min & mask_max
            # points outside the z-range of the histograms are set to 0!
            self.pz[mask, zbin] = itp.splev(z_mod[mask], spline_pz)
            # Normalize selection functions
            dz = self.z_p[1:] - self.z_p[:-1]
            self.pz_norm[zbin] = np.sum(
                0.5 * (self.pz[1:, zbin] + self.pz[:-1, zbin]) * dz)

        self.zmax = self.z_p.max()
        self.need_cosmo_arguments(data, {'z_max_pk': self.zmax})

        ################################################
        # discrete theta values (to convert C_l to xi's)
        ################################################

        thetamin = np.min(self.theta_bins) * 0.8
        thetamax = np.max(self.theta_bins) * 1.2

        self.nthetatot = np.ceil(
            math.log(thetamax / thetamin) / self.dlntheta) + 1
        self.nthetatot = np.int32(self.nthetatot)
        self.theta = np.zeros(self.nthetatot, 'float64')
        self.a2r = math.pi / (180. * 60.)

        # define an array of theta's
        for it in xrange(self.nthetatot):
            self.theta[it] = thetamin * math.exp(self.dlntheta * it)

        ################################################################
        # discrete l values used in the integral to convert C_l to xi's)
        ################################################################

        # l = x / theta / self.a2r
        # x = l * theta * self.a2r

        # We start by considering the largest theta, theta[-1], and for that value we infer
        # a list of l's from the requirement that corresponding x values are spaced linearly with a given stepsize, until xmax.
        # Then we loop over smaller theta values, in decreasing order, and for each of them we complete the previous list of l's,
        # always requiuring the same dx stepsize (so that dl does vary) up to xmax.
        #
        # We first apply this to a running value ll, in order to count the total numbner of ll's, called nl.
        # Then we create the array lll[nl] and we fill it with the same values.
        #
        # we also compute on the fly the critical index il_max[it] such that ll[il_max[it]]*self.theta[it]*self.a2r
        # is the first value of x above xmax

        ll = 1.
        il = 0
        while (ll * self.theta[-1] * self.a2r < self.dx_threshold):
            ll += self.dx_below_threshold / self.theta[-1] / self.a2r
            il += 1
        for it in xrange(self.nthetatot):
            while (ll * self.theta[self.nthetatot - 1 - it] * self.a2r <
                   self.xmax) and (ll + self.dx_above_threshold /
                                   self.theta[self.nthetatot - 1 - it] /
                                   self.a2r < self.lmax):
                ll += self.dx_above_threshold / self.theta[self.nthetatot - 1 -
                                                           it] / self.a2r
                il += 1
        self.nl = il + 1

        self.lll = np.zeros(self.nl, 'float64')
        self.il_max = np.zeros(self.nthetatot, 'int')
        il = 0
        self.lll[il] = 1.
        while (self.lll[il] * self.theta[-1] * self.a2r < self.dx_threshold):
            il += 1
            self.lll[il] = self.lll[
                il - 1] + self.dx_below_threshold / self.theta[-1] / self.a2r
        for it in xrange(self.nthetatot):
            while (self.lll[il] * self.theta[self.nthetatot - 1 - it] *
                   self.a2r < self.xmax) and (
                       self.lll[il] + self.dx_above_threshold /
                       self.theta[self.nthetatot - 1 - it] / self.a2r <
                       self.lmax):
                il += 1
                self.lll[il] = self.lll[
                    il - 1] + self.dx_above_threshold / self.theta[
                        self.nthetatot - 1 - it] / self.a2r
            self.il_max[self.nthetatot - 1 - it] = il

        # finally we compute the array l*dl that will be used in the trapezoidal integration
        # (l is a factor in the integrand [l * C_l * Bessel], and dl is like a weight)
        self.ldl = np.zeros(self.nl, 'float64')
        self.ldl[0] = self.lll[0] * 0.5 * (self.lll[1] - self.lll[0])
        for il in xrange(1, self.nl - 1):
            self.ldl[il] = self.lll[il] * 0.5 * (self.lll[il + 1] -
                                                 self.lll[il - 1])
        self.ldl[-1] = self.lll[-1] * 0.5 * (self.lll[-1] - self.lll[-2])

        #####################################################################
        # Allocation of various arrays filled and used in the function loglkl
        #####################################################################

        self.r = np.zeros(self.nzmax, 'float64')
        self.dzdr = np.zeros(self.nzmax, 'float64')
        self.g = np.zeros((self.nzmax, self.nzbins), 'float64')
        self.pk = np.zeros((self.nlmax, self.nzmax), 'float64')
        self.k_sigma = np.zeros(self.nzmax, 'float64')
        self.alpha = np.zeros((self.nlmax, self.nzmax), 'float64')
        if 'epsilon' in self.use_nuisance:
            self.E_th_nu = np.zeros((self.nlmax, self.nzmax), 'float64')
        self.Cl_integrand = np.zeros((self.nzmax, self.nzcorrs), 'float64')
        self.Cl = np.zeros((self.nlmax, self.nzcorrs), 'float64')
        '''
        if self.theoretical_error != 0:
            self.El_integrand = np.zeros((self.nzmax, self.nzcorrs),'float64')
            self.El = np.zeros((self.nlmax, self.nzcorrs), 'float64')
        '''
        self.spline_Cl = np.empty(self.nzcorrs, dtype=(list, 3))
        self.xi1 = np.zeros((self.nthetatot, self.nzcorrs), 'float64')
        self.xi2 = np.zeros((self.nthetatot, self.nzcorrs), 'float64')
        self.Cll = np.zeros((self.nzcorrs, self.nl), 'float64')
        self.BBessel0 = np.zeros(self.nl, 'float64')
        self.BBessel4 = np.zeros(self.nl, 'float64')
        self.xi1_theta = np.empty(self.nzcorrs, dtype=(list, 3))
        self.xi2_theta = np.empty(self.nzcorrs, dtype=(list, 3))
        self.xi = np.zeros(np.size(self.xi_obs), 'float64')

        return