Beispiel #1
0
    def fill_mcmc_parameters(self):
        """
        Initializes the ordered dictionary :attr:`mcmc_parameters` from
        the input parameter file.

        It uses :meth:`read_file`, and initializes instances of
        :class:`parameter` to actually fill in :attr:`mcmc_parameters`.

        """

        # Define temporary quantities, only to simplify the input in the
        # parameter file
        self.parameters = od()

        # Read from the parameter file everything
        try:
            self.param_file = open(self.param, 'r')
        except IOError:
            raise io_mp.ConfigurationError(
                "Error in initializing the Data class, the parameter file " +
                "{0} does not point to a proper file".format(self.param))
        # In case the parameter file is a log.param, scan first once the file
        # to extract only the path dictionnary.
        if self.param.find('log.param') != -1:
            self.read_file(self.param, 'data', field='path')
        self.read_file(self.param, 'data')

        # Test here whether the number of parameters extracted correspond to
        # the number of lines (to make sure no doublon is present)
        number_of_parameters = sum([
            1 for l in open(self.param, 'r')
            if l and l.find('#') == -1 and l.find('data.parameters[') != -1
        ])
        if number_of_parameters != len(self.parameters):
            raise io_mp.ConfigurationError(
                "You probably have two lines in your parameter files with "
                "the same parameter name. This is most probably an error, "
                "which will cause problems down the line. Please fix this.")

        # Do the same for every experiments - but only if you are starting a
        # new folder. Otherwise, this step will actually be done when
        # initializing the likelihood.
        if self.param.find('log.param') == -1:
            for experiment in self.experiments:
                self.read_file(self.param, experiment, separate=True)

        # Finally create all the instances of the Parameter given the input.
        for key, value in self.parameters.iteritems():
            self.mcmc_parameters[key] = Parameter(value, key)

            # When there is no prior edge requested, the syntax consists in setting it to 'None' in the input file.
            # There is also an old syntax which is deprecated: '-1'.
            # We still allow for that, but just after parsing it, we substitute it with 'None'.
            # When the user really wants a prior edge in -1, he can write -1.0, then the next lines will not substitute it.
            for i in [1, 2]:
                if (str(self.mcmc_parameters[key]['initial'][i]) == '-1'):
                    self.mcmc_parameters[key]['initial'][i] = None
        """
def recover_cosmological_module(data):
    """
    From the cosmological module name, initialise the proper Boltzmann code

    .. note::

        Only CLASS is currently wrapped, but a python wrapper of CosmoMC should
        enter here.

    """
    # Importing the python-wrapped CLASS from the correct folder, defined in
    # the .conf file, or overwritten at this point by the log.param.
    # If the cosmological code is CLASS, do the following to import all
    # relevant quantities
    if data.cosmological_module_name == 'CLASS':
        try:
            classy_path = ''
            for elem in os.listdir(
                    os.path.join(data.path['cosmo'], "python", "build")):
                if elem.find("lib.") != -1:
                    classy_path = os.path.join(data.path['cosmo'], "python",
                                               "build", elem)
                if len(classy_path) == 1:
                    classy_path = classy_path[0]
                else:
                    stringcheck = "%i.%i" % (sys.version_info.major,
                                             sys.version_info.minor)
                    for path in classy_path:
                        if stringcheck in path:
                            classy_path = path
                            break
        except OSError:
            raise io_mp.ConfigurationError(
                "You probably did not compile the python wrapper of CLASS. " +
                "Please go to /path/to/class/python/ and do\n" +
                "..]$ python setup.py build")

        # Inserting the previously found path into the list of folders to
        # search for python modules.
        sys.path.insert(1, classy_path)
        try:
            from classy import Class
        except ImportError:
            raise io_mp.MissingLibraryError(
                "You must have compiled the classy.pyx file. Please go to " +
                "/path/to/class/python and run the command\n " +
                "python setup.py build")

        cosmo = Class()
    else:
        raise io_mp.ConfigurationError(
            "Unrecognised cosmological module. " +
            "Be sure to define the correct behaviour in MontePython.py " +
            "and data.py, to support a new one.")

    return cosmo
Beispiel #3
0
def run(cosmo, data, command_line):
    """
    Depending on the choice of sampler, dispatch the appropriate information

    The :mod:`mcmc` module is used as previously, except the call to
    :func:`mcmc.chain`, or :func:`nested_sampling.run` is now within
    this function, instead of from within :mod:`MontePython`.

    In the long term, this function should contain any potential hybrid scheme.

    """

    if command_line.method == 'MH':
        import mcmc
        mcmc.chain(cosmo, data, command_line)
        data.out.close()
    elif command_line.method == 'NS':
        import nested_sampling as ns
        ns.run(cosmo, data, command_line)
    elif command_line.method == 'CH':
        import cosmo_hammer as hammer
        hammer.run(cosmo, data, command_line)
    elif command_line.method == 'IS':
        import importance_sampling as ims
        ims.run(cosmo, data, command_line)
    elif command_line.method == 'Der':
        import add_derived as der
        der.run(cosmo, data, command_line)
    else:
        raise io_mp.ConfigurationError("Sampling method %s not understood" %
                                       command_line.method)
 def error(self, message):
     """Override method to raise error
     Parameters
     ----------
     message: string
         error message
     """
     raise io_mp.ConfigurationError(message)
Beispiel #5
0
def run(cosmo, data, command_line):
    """
    Rewrite chains with more derived parameters

    Starting from an existing folder, with some chains, constraining a certain
    model, and having some derived parameters, the idea is to recompute the
    cosmological code to follow additional derived parameters.
    """
    target_folder = command_line.Der_target_folder
    # If it does not exist, create it
    if not os.path.isdir(target_folder):
        os.makedirs(target_folder)

    starting_folder = command_line.folder
    # Recover all chains in the starting folder
    chains = []
    #  If it exists, we recover all chains within.
    if os.path.isdir(starting_folder):
        for elem in os.listdir(starting_folder):
            if elem.find("__") != -1:
                chains.append(elem)

    # Read the additional derived parameter, remove the needs for output=mPk
    # except if sigma8 is there.
    new_derived = command_line.derived_parameters
    if not new_derived:
        raise io_mp.ConfigurationError(
            "You asked to add derived parameters, but did not specify a list "
            "of new ones to consider. Please use the flag `--Der-param-list`.")
    # Add them to the mcmc_parameters dict
    for param in new_derived:
        data.mcmc_parameters[param] = Parameter(
            [0, None, None, 0, 1, 'derived'], param)
    # Reset the cosmo_arguments dict output entry, and adapt it in case a
    # derived parameter requires a particular CLASS behaviour.
    data.cosmo_arguments.update({'output': ''})
    for key in ['lensing', 'l_max_scalars']:
        if key in data.cosmo_arguments.keys():
            data.cosmo_arguments.pop(key)
    if 'sigma8' in new_derived:
        data.cosmo_arguments.update({'output': 'mPk'})

    # Copy the log.param over from the starting folder, and add new lines
    # concerning the new derived parameters, for analysis.
    copy_log_file(starting_folder, target_folder, new_derived)

    # Preparing the arguments for reading the files
    pool = Pool()
    args = [(data, cosmo, command_line, target_folder, elem, new_derived)
            for elem in chains]
    # Note the use of translate_chain_star, and not translate_chain, because of
    # the limitations of the `map` function (it only takes one argument). The
    # `_star` function simply unwraps the argument.
    print '\nStart extracting the chains:\n'
    pool.map(extend_chain_star, args)
    # Close the pool, and join everything (the join might not be needed)
    pool.close()
    pool.join()
Beispiel #6
0
def safe_initialisation(custom_command="", comm=None, nprocs=1):
    """
    Wrapper around the init function to handle errors

    KeyWord Arguments
    -----------------
    custom_command : str
        testing purposes
    comm : MPI.Intracomm
        object that helps communicating between the processes
    nprocs : int
        number of processes
    """
    try:
        cosmo1, cosmo2, data, command_line, success = initialise(
            custom_command)
    except io_mp.ConfigurationError as message:
        if comm:
            for index in range(1, nprocs):
                comm.send('failed', dest=index, tag=1)
        print(str(message))
        raise io_mp.ConfigurationError(
            "The initialisation was not successful, resulting in a "
            "potentially half created `log.param`. Please see the "
            "above error message. If you run the exact same command, it"
            " will not work. You should solve the problem, and try again.")
    except KeyError as e:
        if comm:
            for index in range(1, nprocs):
                comm.send('failed', dest=index, tag=1)
        raise io_mp.ConfigurationError(
            "You are running in a folder that was created following "
            "a non-successful initialisation (wrong parameter name, "
            "wrong likelihood, etc...). If you have solved the issue, you "
            "should remove completely the output folder, and try again." +
            " Alternatively, there could be a problem with " + e.message)
    return cosmo1, cosmo2, data, command_line, success
Beispiel #7
0
def recover_local_path(command_line):
    """
    Read the configuration file, filling a dictionary

    Returns
    -------
    path : dict
        contains the absolute path to the location of the code, the data, the
        cosmological code, and potential likelihood codes (clik for Planck,
        etc)
    """
    # Define the dictionnary that will hold the local configuration
    path = {}

    # The path is recovered by taking the path to this file (MontePython.py).
    # By default, then, the data folder is located in the same root directory.
    # Any setting in the configuration file will overwrite this one.
    path['root'] = os.path.sep.join(
        os.path.abspath(__file__).split(os.path.sep)[:-2])
    path['MontePython'] = os.path.join(path['root'], 'montepython')
    path['data'] = os.path.join(path['root'], 'data')

    # the rest is important only when running the MCMC chains
    if command_line.subparser_name == 'run':
        # Configuration file, defaulting to default.conf in your root
        # directory.  This can be changed with the command line option --conf.
        # All changes will be stored into the log.param of your folder, and
        # hence will be reused for an ulterior run in the same directory
        conf_file = os.path.abspath(command_line.config_file)
        if os.path.isfile(conf_file):
            for line in open(conf_file):
                exec(line)
            for key, value in dictitems(path):
                path[key] = os.path.normpath(os.path.expanduser(value))
        else:
            # The error is ignored if reading from a log.param, because it is
            # stored
            if command_line.param.find('log.param') == -1:
                raise io_mp.ConfigurationError(
                    "You must provide a valid  .conf file (I tried to read"
                    "%s) " % os.path.abspath(command_line.config_file) +
                    " that specifies the correct locations for your data "
                    "folder, Class, (Clik), etc...")

    return path
    def __init__(self, array):
        """
        It takes as an optional input argument the array of the input
        :data:`parameters` defined in the parameter file.

        The current implemented types are 'flat' (default), and 'gaussian',
        which expect also a mean and sigma. Possible extension would take a
        'external', needing to read an external file to read for the
        definition.

        The entry 'prior' of the dictionary :data:`mcmc_parameters` will hold
        an instance of this class. It defines one main function, called
        :func:`draw_from_prior`, that returns a number within the prior volume.

        """

        rd.seed()

        # Test the length of the array, and initialize the type.
        if len(array) == 6:
            # Default behaviour, flat prior
            self.prior_type = 'flat'
        else:
            self.prior_type = array[6].lower()
            # in case of a gaussian prior, one expects two more entries, mu and
            # sigma
            if self.prior_type == 'gaussian':
                try:
                    self.mu = array[7]
                    self.sigma = array[8]
                except IndexError:
                    raise io_mp.ConfigurationError(
                        "You asked for a gaussian prior, but provided no " +
                        "mean nor sigma. Please add them in the parameter " +
                        "file.")

        # Store boundaries for convenient access later
        # Put all fields that are -1 to None to avoid confusion later on.
        self.prior_range = [
            a if not ((a is -1) or (a is None)) else None
            for a in deepcopy(array[1:3])
        ]
Beispiel #9
0
def chain(cosmo, data, command_line):
    """
    Run a Markov chain of fixed length with a Metropolis Hastings algorithm.

    Main function of this module, this is the actual Markov chain procedure.
    After having selected a starting point in parameter space defining the
    first **last accepted** one, it will, for a given amount of steps :

    + choose randomnly a new point following the *proposal density*,
    + compute the cosmological *observables* through the cosmological module,
    + compute the value of the *likelihoods* of the desired experiments at this
      point,
    + *accept/reject* this point given its likelihood compared to the one of
      the last accepted one.

    Every time the code accepts :code:`data.write_step` number of points
    (quantity defined in the input parameter file), it will write the result to
    disk (flushing the buffer by forcing to exit the output file, and reopen it
    again.

    .. note::

        to use the code to set a fiducial file for certain fixed parameters,
        you can use two solutions. The first one is to put all input 1-sigma
        proposal density to zero (this method still works, but is not
        recommended anymore). The second one consist in using the flag "-f 0",
        to force a step of zero amplitude.

    """

    ## Initialisation
    loglike = 0

    # In case command_line.silent has been asked, outputs should only contain
    # data.out. Otherwise, it will also contain sys.stdout
    outputs = [data.out]
    if not command_line.silent:
        outputs.append(sys.stdout)

    # check for MPI
    try:
        from mpi4py import MPI
        comm = MPI.COMM_WORLD
        rank = comm.Get_rank()
        # suppress duplicate output from slaves
        if rank:
            command_line.quiet = True
    except ImportError:
        # set all chains to master if no MPI
        rank = 0

    # Recover the covariance matrix according to the input, if the varying set
    # of parameters is non-zero
    if (data.get_mcmc_parameters(['varying']) != []):
        sigma_eig, U, C = sampler.get_covariance_matrix(
            cosmo, data, command_line)
        if data.jumping_factor == 0:
            warnings.warn(
                "The jumping factor has been set to 0. The above covariance " +
                "matrix will not be used.")

    # In case of a fiducial run (all parameters fixed), simply run once and
    # print out the likelihood. This should not be used any more (one has to
    # modify the log.param, which is never a good idea. Instead, force the code
    # to use a jumping factor of 0 with the option "-f 0".
    else:
        warnings.warn(
            "You are running with no varying parameters... I will compute " +
            "only one point and exit")
        data.update_cosmo_arguments()  # this fills in the fixed parameters
        loglike = sampler.compute_lkl(cosmo, data)
        io_mp.print_vector(outputs, 1, loglike, data)
        return 1, loglike

    # In the fast-slow method, one need the Cholesky decomposition of the
    # covariance matrix. Return the Cholesky decomposition as a lower
    # triangular matrix
    Cholesky = None
    Rotation = None
    if command_line.jumping == 'fast':
        Cholesky = la.cholesky(C).T
        Rotation = np.identity(len(sigma_eig))

    # If the update mode was selected, the previous (or original) matrix should be stored
    if command_line.update:
        previous = (sigma_eig, U, C, Cholesky)

    # If restart wanted, pick initial value for arguments
    if command_line.restart is not None:
        sampler.read_args_from_chain(data, command_line.restart)

    # If restart from best fit file, read first point (overwrite settings of
    # read_args_from_chain)
    if command_line.bf is not None:
        sampler.read_args_from_bestfit(data, command_line.bf)

    # Pick a position (from last accepted point if restart, from the mean value
    # else), with a 100 tries.
    for i in range(100):
        if get_new_position(data, sigma_eig, U, i, Cholesky, Rotation) is True:
            break
        if i == 99:
            raise io_mp.ConfigurationError(
                "You should probably check your prior boundaries... because " +
                "no valid starting position was found after 100 tries")

    # Compute the starting Likelihood
    loglike = sampler.compute_lkl(cosmo, data)

    # Choose this step as the last accepted value
    # (accept_step), and modify accordingly the max_loglike
    sampler.accept_step(data)
    max_loglike = loglike

    # If the jumping factor is 0, the likelihood associated with this point is
    # displayed, and the code exits.
    if data.jumping_factor == 0:
        io_mp.print_vector(outputs, 1, loglike, data)
        return 1, loglike

    acc, rej = 0.0, 0.0  # acceptance and rejection number count
    N = 1  # number of time the system stayed in the current position

    # define path and covmat
    input_covmat = command_line.cov
    base = os.path.basename(command_line.folder)
    # the previous line fails when "folder" is a string ending with a slash. This issue is cured by the next lines:
    if base == '':
        base = os.path.basename(command_line.folder[:-1])
    command_line.cov = os.path.join(command_line.folder, base + '.covmat')

    # Print on screen the computed parameters
    if not command_line.silent and not command_line.quiet:
        io_mp.print_parameters(sys.stdout, data)

    # Suppress non-informative output after initializing
    command_line.quiet = True

    k = 1
    # Main loop, that goes on while the maximum number of failure is not
    # reached, and while the expected amount of steps (N) is not taken.
    while k <= command_line.N:

        # If the number of steps reaches the number set in the update method,
        # then the proposal distribution should be adapted.
        if command_line.update:

            # master chain behavior
            if not rank:
                # Add the folder to the list of files to analyze, and switch on the
                # options for computing only the covmat
                from parser_mp import parse
                info_command_line = parse(
                    'info %s --minimal --noplot --keep-fraction 0.5 --keep-non-markovian --want-covmat'
                    % command_line.folder)
                info_command_line.update = command_line.update
                # the +10 below is here to ensure that the first master update will take place before the first slave updates,
                # but this is a detail, the code is robust against situations where updating is not possible, so +10 could be omitted
                if not (k + 10) % command_line.update and k > 10:
                    # Try to launch an analyze
                    try:
                        from analyze import analyze
                        R_minus_one = analyze(info_command_line)
                    except:
                        if not command_line.silent:
                            print 'Step ', k, ' chain ', rank, ': Failed to calculate covariant matrix'
                        pass

                if not (k - 1) % command_line.update:
                    try:
                        # Read the covmat
                        sigma_eig, U, C = sampler.get_covariance_matrix(
                            cosmo, data, command_line)
                        if command_line.jumping == 'fast':
                            Cholesky = la.cholesky(C).T
                        # Test here whether the covariance matrix has really changed
                        # We should in principle test all terms, but testing the first one should suffice
                        if not C[0, 0] == previous[2][0, 0]:
                            previous = (sigma_eig, U, C, Cholesky)
                            if k == 1:
                                if not command_line.silent:
                                    if not input_covmat == None:
                                        warnings.warn(
                                            'Appending to an existing folder: using %s instead of %s. '
                                            'If new input covmat is desired, please delete previous covmat.'
                                            % (command_line.cov, input_covmat))
                                    else:
                                        warnings.warn(
                                            'Appending to an existing folder: using %s. '
                                            'If no starting covmat is desired, please delete previous covmat.'
                                            % command_line.cov)
                            else:
                                data.out.write(
                                    '# After %d accepted steps: update proposal with max(R-1) = %f \n'
                                    % (int(acc), max(R_minus_one)))
                                if not command_line.silent:
                                    print 'After %d accepted steps: update proposal with max(R-1) = %f \n' % (
                                        int(acc), max(R_minus_one))
                                try:
                                    if stop - after - update:
                                        k = command_line.N
                                        print 'Covariant matrix updated - stopping run'
                                except:
                                    pass

                    except:
                        pass

                    command_line.quiet = True

            # slave chain behavior
            else:
                if not (k - 1) % command_line.update:
                    try:
                        sigma_eig, U, C = sampler.get_covariance_matrix(
                            cosmo, data, command_line)
                        if command_line.jumping == 'fast':
                            Cholesky = la.cholesky(C).T
                        # Test here whether the covariance matrix has really changed
                        # We should in principle test all terms, but testing the first one should suffice
                        if not C[0, 0] == previous[2][0, 0] and not k == 1:
                            data.out.write(
                                '# After %d accepted steps: update proposal \n'
                                % int(acc))
                            if not command_line.silent:
                                print 'After %d accepted steps: update proposal \n' % int(
                                    acc)
                            try:
                                if stop_after_update:
                                    k = command_line.N
                                    print 'Covariant matrix updated - stopping run'
                            except:
                                pass
                        previous = (sigma_eig, U, C, Cholesky)

                    except IOError:
                        pass

        # Pick a new position ('current' flag in mcmc_parameters), and compute
        # its likelihood. If get_new_position returns True, it means it did not
        # encounter any boundary problem. Otherwise, just increase the
        # multiplicity of the point and start the loop again
        if get_new_position(data, sigma_eig, U, k, Cholesky, Rotation) is True:
            newloglike = sampler.compute_lkl(cosmo, data)
        else:  # reject step
            rej += 1
            N += 1
            k += 1
            continue

        # Harmless trick to avoid exponentiating large numbers. This decides
        # whether or not the system should move.
        if (newloglike != data.boundary_loglike):
            if (newloglike >= loglike):
                alpha = 1.
            else:
                alpha = np.exp(newloglike - loglike)
        else:
            alpha = -1

        if ((alpha == 1.) or (rd.uniform(0, 1) < alpha)):  # accept step

            # Print out the last accepted step (WARNING: this is NOT the one we
            # just computed ('current' flag), but really the previous one.)
            # with its proper multiplicity (number of times the system stayed
            # there).
            io_mp.print_vector(outputs, N, loglike, data)

            # Report the 'current' point to the 'last_accepted'
            sampler.accept_step(data)
            loglike = newloglike
            if loglike > max_loglike:
                max_loglike = loglike
            acc += 1.0
            N = 1  # Reset the multiplicity

        else:  # reject step
            rej += 1.0
            N += 1  # Increase multiplicity of last accepted point

        # Regularly (option to set in parameter file), close and reopen the
        # buffer to force to write on file.
        if acc % data.write_step == 0:
            io_mp.refresh_file(data)
            # Update the outputs list
            outputs[0] = data.out
        k += 1  # One iteration done
    # END OF WHILE LOOP

    # If at this moment, the multiplicity is higher than 1, it means the
    # current point is not yet accepted, but it also mean that we did not print
    # out the last_accepted one yet. So we do.
    if N > 1:
        io_mp.print_vector(outputs, N - 1, loglike, data)

    # Print out some information on the finished chain
    rate = acc / (acc + rej)
    sys.stdout.write('\n#  {0} steps done, acceptance rate: {1}\n'.format(
        command_line.N, rate))

    # In case the acceptance rate is too low, or too high, print a warning
    if rate < 0.05:
        warnings.warn("The acceptance rate is below 0.05. You might want to "
                      "set the jumping factor to a lower value than the "
                      "default (2.4), with the option `-f 1.5` for instance.")
    elif rate > 0.6:
        warnings.warn("The acceptance rate is above 0.6, which means you might"
                      " have difficulties exploring the entire parameter space"
                      ". Try analysing these chains, and use the output "
                      "covariance matrix to decrease the acceptance rate to a "
                      "value between 0.2 and 0.4 (roughly).")

    # For a restart, erase the starting point to keep only the new, longer
    # chain.
    if command_line.restart is not None:
        os.remove(command_line.restart)
        sys.stdout.write(
            '    deleting starting point of the chain {0}\n'.format(
                command_line.restart))

    return
def initialise(cosmo1, cosmo2, data, command_line):
    """
    Main call to prepare the information for the MultiNest run.
    """

    # Convenience variables
    varying_param_names = data.get_mcmc_parameters(['varying'])
    derived_param_names = data.get_mcmc_parameters(['derived'])

    # Check that all the priors are flat and that all the parameters are bound
    is_flat, is_bound = sampler.check_flat_bound_priors(
        data.mcmc_parameters, varying_param_names)
    if not is_flat:
        raise io_mp.ConfigurationError(
            'Nested Sampling with MultiNest is only possible with flat ' +
            'priors. Sorry!')
    if not is_bound:
        raise io_mp.ConfigurationError(
            'Nested Sampling with MultiNest is only possible for bound ' +
            'parameters. Set reasonable bounds for them in the ".param"' +
            'file.')

    # If absent, create the sub-folder NS
    NS_folder = os.path.join(command_line.folder, NS_subfolder)
    if not os.path.exists(NS_folder):
        os.makedirs(NS_folder)

    # Use chain name as a base name for MultiNest files
    chain_name = [a for a in command_line.folder.split(os.path.sep) if a][-1]
    base_name = os.path.join(NS_folder, chain_name)
    # FK: add base folder name to NS_arguments for later reference
    data.NS_arguments['base_dir'] = NS_folder

    # Prepare arguments for PyMultiNest
    # -- Automatic arguments
    data.NS_arguments['n_dims'] = len(varying_param_names)
    data.NS_arguments['n_params'] = (len(varying_param_names) +
                                     len(derived_param_names))
    data.NS_arguments['verbose'] = True
    data.NS_arguments['outputfiles_basename'] = base_name + NS_separator
    # -- User-defined arguments
    for arg in NS_user_arguments:
        value = getattr(command_line, NS_prefix + arg)
        # Special case: clustering parameters
        if arg == 'clustering_params':
            clustering_param_names = value if value != -1 else []
            continue
        # Rest of the cases
        if value != -1:
            data.NS_arguments[arg] = value
        # else: don't define them -> use PyMultiNest default value

    # Clustering parameters -- reordering to put them first
    NS_param_names = []
    if clustering_param_names:
        data.NS_arguments['n_clustering_params'] = len(clustering_param_names)
        for param in clustering_param_names:
            if not param in varying_param_names:
                raise io_mp.ConfigurationError(
                    'The requested clustering parameter "%s"' % param +
                    ' was not found in your ".param" file. Pick a valid one.')
            NS_param_names.append(param)
    for param in varying_param_names:
        if not param in NS_param_names:
            NS_param_names.append(param)
    data.NS_param_names = NS_param_names

    # Caveat: multi-modal sampling OFF by default; if requested, INS disabled
    try:
        if data.NS_arguments['multimodal']:
            data.NS_arguments['importance_nested_sampling'] = False
            warnings.warn('Multi-modal sampling has been requested, ' +
                          'so Importance Nested Sampling has been disabled')
    except KeyError:
        data.NS_arguments['multimodal'] = False

    # MPI: don't initialise it inside MultiNest.
    # Rather, it is either initialised by Monte Python (if MPI used) or ignored
    data.NS_arguments['init_MPI'] = False

    # Write the MultiNest arguments and parameter ordering
    with open(base_name + name_arguments, 'w') as afile:
        for arg in data.NS_arguments:
            if arg != 'n_clustering_params':
                afile.write(' = '.join([str(arg),
                                        str(data.NS_arguments[arg])]))
            else:
                afile.write('clustering_params = ' +
                            ' '.join(clustering_param_names))
            afile.write('\n')
    with open(base_name + name_paramnames, 'w') as pfile:
        pfile.write('\n'.join(NS_param_names + derived_param_names))
def run(cosmo, data, command_line):
    """
    Sample with the CosmoHammer

    """
    # Store the parameters inside the format expected by CosmoHammer
    # TODO: about the derived params?
    parameter_names = data.get_mcmc_parameters(["varying"])

    # Ensure that their prior is bound and flat
    is_flat, is_bound = sampler.check_flat_bound_priors(
        data.mcmc_parameters, parameter_names)
    if not is_flat:
        raise io_mp.ConfigurationError(
            'The Cosmo Hammer is only available with flat ' + 'priors. Sorry!')
    if not is_bound:
        raise io_mp.ConfigurationError(
            'The Cosmo Hammer is only available for bound ' +
            'parameters. Set reasonable bounds for them in the ".param"' +
            'file.')

    params = []
    for parameter in parameter_names:
        params.append(data.mcmc_parameters[parameter]['initial'])
    params = np.array(params)

    # If absent, create the sub-folder CH
    CH_folder = os.path.join(command_line.folder, CH_subfolder)
    if not os.path.exists(CH_folder):
        os.makedirs(CH_folder)

    # Initialize a chain object (Beware, the concept is quite different than
    # the chain of the module :mod:`mcmc`)
    chain = LikelihoodComputationChain(min=params[:, 1], max=params[:, 2])

    # Add data and cosmo as two core modules. Note that the order is important
    # here, since data must be called before cosmo.
    chain.addCoreModule(data)
    chain.addCoreModule(cosmo)

    # Add each likelihood class as a LikelihoodModule
    for likelihood in dictvalues(data.lkl):
        chain.addLikelihoodModule(likelihood)

    # Define the file prefix
    chain_name = [a for a in command_line.folder.split(os.path.sep) if a][-1]
    file_prefix = os.path.join(command_line.folder, CH_subfolder, chain_name)

    # Recover the User options
    data.CH_arguments = {}
    for arg in CH_user_arguments:
        value = getattr(command_line, CH_prefix + arg)
        if value != -1:
            data.CH_arguments[arg] = value
        # else, do not define them, and leave the default Cosmo Hammer ones.

    # Write the CosmoHammer arguments
    with open(file_prefix + name_arguments, 'w') as arg_file:
        for arg in data.CH_arguments:
            arg_file.write(
                ' = '.join([str(arg), str(data.CH_arguments[arg])]) + '\n')

    # Create an extension to the SampleFileUtil from cosmoHammer
    derived_util = DerivedUtil(file_prefix)

    try:
        num_threads = int(os.environ['OMP_NUM_THREADS'])
    except KeyError:
        warnings.warn(
            "The environment variable OMP_NUM_THREADS is not set. "
            "To run the Cosmo Hammer meaningfully, you should better "
            "set it to something! Defaulting to 1 for now.")
        num_threads = 1

    # Create the Sampler object
    sampler_hammer = CosmoHammerSampler(params=params,
                                        likelihoodComputationChain=chain,
                                        filePrefix=file_prefix,
                                        walkersRatio=50,
                                        burninIterations=10,
                                        sampleIterations=30,
                                        storageUtil=derived_util,
                                        threadCount=num_threads,
                                        **data.CH_arguments)

    # create console handler and set level to debug (does not seem to appear)
    console_handler = logging.StreamHandler()
    console_handler.setLevel(logging.DEBUG)
    logging.getLogger().addHandler(console_handler)

    sampler_hammer.startSampling()
Beispiel #12
0
    def __init__(self, path, data, command_line):
        # I should already take care of using only GRF mocks or data here (because of different folder-structures etc...)
        # or for now just write it for GRFs for tests and worry about it later...
        Likelihood.__init__(self, path, data, command_line)

        # Check if the data can be found
        try:
            fname = os.path.join(self.data_directory,
                                 'Resetting_bias/parameters_B_mode_model.dat')
            parser_mp.existing_file(fname)
        except:
            raise io_mp.ConfigurationError(
                'KiDS-450 QE data not found. Download the data at '
                'http://kids.strw.leidenuniv.nl/sciencedata.php '
                'and specify path to data through the variable '
                'kids450_qe_likelihood_public.data_directory in '
                'the .data file. See README in likelihood folder '
                'for further instructions.')

        # TODO: this is also CFHTLenS legacy...
        # only relevant for GRFs!
        #dict_BWM = {'W1': 'G10_', 'W2': 'G126_', 'W3': 'G162_', 'W4': 'G84_'}

        self.need_cosmo_arguments(data, {'output': 'mPk'})

        self.redshift_bins = []
        for index_zbin in xrange(len(self.zbin_min)):
            redshift_bin = '{:.2f}z{:.2f}'.format(self.zbin_min[index_zbin],
                                                  self.zbin_max[index_zbin])
            self.redshift_bins.append(redshift_bin)

        # number of z-bins
        self.nzbins = len(self.redshift_bins)
        # number of *unique* correlations between z-bins
        self.nzcorrs = self.nzbins * (self.nzbins + 1) / 2

        all_bands_EE_to_use = []
        all_bands_BB_to_use = []
        '''
        if self.fit_cross_correlations_only:
            # mask out auto-spectra:
            for index_zbin1 in xrange(self.nzbins):
                for index_zbin2 in xrange(index_zbin1 + 1):
                    if index_zbin1 == index_zbin2:
                        all_bands_EE_to_use += np.zeros_like(self.bands_EE_to_use).tolist()
                        all_bands_BB_to_use += np.zeros_like(self.bands_BB_to_use).tolist()
                    else:
                        all_bands_EE_to_use += self.bands_EE_to_use
                        all_bands_BB_to_use += self.bands_BB_to_use

        else:
            # default, use all correlations:
            for i in xrange(self.nzcorrs):
                all_bands_EE_to_use += self.bands_EE_to_use
                all_bands_BB_to_use += self.bands_BB_to_use
        '''
        # default, use all correlations:
        for i in xrange(self.nzcorrs):
            all_bands_EE_to_use += self.bands_EE_to_use
            all_bands_BB_to_use += self.bands_BB_to_use

        all_bands_to_use = np.concatenate(
            (all_bands_EE_to_use, all_bands_BB_to_use))
        self.indices_for_bands_to_use = np.where(
            np.asarray(all_bands_to_use) == 1)[0]

        # this is also the number of points in the datavector
        ndata = len(self.indices_for_bands_to_use)

        # I should load all the data needed only once, i.e. HERE:
        # not so sure about statement above, I have the feeling "init" is called for every MCMC step...
        # maybe that's why the memory is filling up on other machines?! --> nope, that wasn't the reason...
        start_load = time.time()

        if self.correct_resetting_bias:
            fname = os.path.join(self.data_directory,
                                 'Resetting_bias/parameters_B_mode_model.dat')
            A_B_modes, exp_B_modes, err_A_B_modes, err_exp_B_modes = np.loadtxt(
                fname, unpack=True)
            self.params_resetting_bias = np.array([A_B_modes, exp_B_modes])
            fname = os.path.join(self.data_directory,
                                 'Resetting_bias/covariance_B_mode_model.dat')
            self.cov_resetting_bias = np.loadtxt(fname)

        # try to load fiducial m-corrections from file (currently these are global values over full field, hence no looping over fields required for that!)
        # TODO: Make output dependent on field, not necessary for current KiDS approach though!
        try:
            fname = os.path.join(
                self.data_directory,
                '{:}zbins/m_correction_avg.txt'.format(self.nzbins))
            if self.nzbins == 1:
                self.m_corr_fiducial_per_zbin = np.asarray(
                    [np.loadtxt(fname, usecols=[1])])
            else:
                self.m_corr_fiducial_per_zbin = np.loadtxt(fname, usecols=[1])
        except:
            self.m_corr_fiducial_per_zbin = np.zeros(self.nzbins)
            print('Could not load m-correction values from \n', fname)
            print('Setting them to zero instead.')

        try:
            fname = os.path.join(
                self.data_directory,
                '{:}zbins/sigma_int_n_eff_{:}zbins.dat'.format(
                    self.nzbins, self.nzbins))
            tbdata = np.loadtxt(fname)
            if self.nzbins == 1:
                # correct columns for file!
                sigma_e1 = np.asarray([tbdata[2]])
                sigma_e2 = np.asarray([tbdata[3]])
                n_eff = np.asarray([tbdata[4]])
            else:
                # correct columns for file!
                sigma_e1 = tbdata[:, 2]
                sigma_e2 = tbdata[:, 3]
                n_eff = tbdata[:, 4]

            self.sigma_e = np.sqrt((sigma_e1**2 + sigma_e2**2) / 2.)
            # convert from 1 / sq. arcmin to 1 / sterad
            self.n_eff = n_eff / np.deg2rad(1. / 60.)**2
        except:
            # these dummies will set noise power always to 0!
            self.sigma_e = np.zeros(self.nzbins)
            self.n_eff = np.ones(self.nzbins)
            print('Could not load sigma_e and n_eff!')

        collect_bp_EE_in_zbins = []
        collect_bp_BB_in_zbins = []
        # collect BP per zbin and combine into one array
        for zbin1 in xrange(self.nzbins):
            for zbin2 in xrange(zbin1 + 1):  #self.nzbins):
                # zbin2 first in fname!
                fname_EE = os.path.join(
                    self.data_directory,
                    '{:}zbins/band_powers_EE_z{:}xz{:}.dat'.format(
                        self.nzbins, zbin1 + 1, zbin2 + 1))
                fname_BB = os.path.join(
                    self.data_directory,
                    '{:}zbins/band_powers_BB_z{:}xz{:}.dat'.format(
                        self.nzbins, zbin1 + 1, zbin2 + 1))
                extracted_band_powers_EE = np.loadtxt(fname_EE)
                extracted_band_powers_BB = np.loadtxt(fname_BB)
                collect_bp_EE_in_zbins.append(extracted_band_powers_EE)
                collect_bp_BB_in_zbins.append(extracted_band_powers_BB)

        self.band_powers = np.concatenate(
            (np.asarray(collect_bp_EE_in_zbins).flatten(),
             np.asarray(collect_bp_BB_in_zbins).flatten()))

        fname = os.path.join(
            self.data_directory,
            '{:}zbins/covariance_all_z_EE_BB.dat'.format(self.nzbins))
        self.covariance = np.loadtxt(fname)

        fname = os.path.join(
            self.data_directory,
            '{:}zbins/band_window_matrix_nell100.dat'.format(self.nzbins))
        self.band_window_matrix = np.loadtxt(fname)
        # ells_intp and also band_offset are consistent between different patches!

        fname = os.path.join(
            self.data_directory,
            '{:}zbins/multipole_nodes_for_band_window_functions_nell100.dat'.
            format(self.nzbins))
        self.ells_intp = np.loadtxt(fname)
        self.band_offset_EE = len(extracted_band_powers_EE)
        self.band_offset_BB = len(extracted_band_powers_BB)

        # Check if any of the n(z) needs to be shifted in loglkl by D_z{1...n}:
        self.shift_n_z_by_D_z = np.zeros(self.nzbins, 'bool')
        for zbin in xrange(self.nzbins):
            param_name = 'D_z{:}'.format(zbin + 1)
            if param_name in data.mcmc_parameters:
                self.shift_n_z_by_D_z[zbin] = True

        # Read fiducial dn_dz from window files:
        # TODO: the hardcoded z_min and z_max correspond to the lower and upper
        # endpoints of the shifted left-border histogram!
        z_samples = []
        hist_samples = []
        for zbin in xrange(self.nzbins):
            redshift_bin = self.redshift_bins[zbin]
            window_file_path = os.path.join(
                self.data_directory,
                '{:}/n_z_avg_{:}.hist'.format(self.photoz_method,
                                              redshift_bin))
            if os.path.exists(window_file_path):
                zptemp, hist_pz = np.loadtxt(window_file_path,
                                             usecols=[0, 1],
                                             unpack=True)
                shift_to_midpoint = np.diff(zptemp)[0] / 2.
                if zbin > 0:
                    zpcheck = zptemp
                    if np.sum((zptemp - zpcheck)**2) > 1e-6:
                        raise io_mp.LikelihoodError(
                            'The redshift values for the window files at different bins do not match.'
                        )
                print('Loaded n(zbin{:}) from: \n'.format(zbin + 1),
                      window_file_path)
                # we add a zero as first element because we want to integrate down to z = 0!
                z_samples += [
                    np.concatenate((np.zeros(1), zptemp + shift_to_midpoint))
                ]
                hist_samples += [np.concatenate((np.zeros(1), hist_pz))]
            else:
                raise io_mp.LikelihoodError("File not found:\n %s" %
                                            window_file_path)

        z_samples = np.asarray(z_samples)
        hist_samples = np.asarray(hist_samples)

        # prevent undersampling of histograms!
        if self.nzmax < len(zptemp):
            print(
                "You're trying to integrate at lower resolution than supplied by the n(z) histograms. \n Increase nzmax! Aborting now..."
            )
            exit()
        # if that's the case, we want to integrate at histogram resolution and need to account for
        # the extra zero entry added
        elif self.nzmax == len(zptemp):
            self.nzmax = z_samples.shape[1]
            # requires that z-spacing is always the same for all bins...
            self.redshifts = z_samples[0, :]
            print('Integrations performed at resolution of histogram!')
        # if we interpolate anyway at arbitrary resolution the extra 0 doesn't matter
        else:
            self.nzmax += 1
            self.redshifts = np.linspace(z_samples.min(), z_samples.max(),
                                         self.nzmax)
            print('Integration performed at set nzmax resolution!')

        self.pz = np.zeros((self.nzmax, self.nzbins))
        self.pz_norm = np.zeros(self.nzbins, 'float64')
        for zbin in xrange(self.nzbins):
            # we assume that the histograms loaded are given as left-border histograms
            # and that the z-spacing is the same for each histogram
            spline_pz = itp.splrep(z_samples[zbin, :], hist_samples[zbin, :])

            #z_mod = self.z_p
            mask_min = self.redshifts >= z_samples[zbin, :].min()
            mask_max = self.redshifts <= z_samples[zbin, :].max()
            mask = mask_min & mask_max
            # points outside the z-range of the histograms are set to 0!
            self.pz[mask, zbin] = itp.splev(self.redshifts[mask], spline_pz)
            # Normalize selection functions
            dz = self.redshifts[1:] - self.redshifts[:-1]
            self.pz_norm[zbin] = np.sum(
                0.5 * (self.pz[1:, zbin] + self.pz[:-1, zbin]) * dz)

        self.z_max = self.redshifts.max()

        # k_max is arbitrary at the moment, since cosmology module is not calculated yet...TODO
        if self.mode == 'halofit':
            self.need_cosmo_arguments(
                data, {
                    'z_max_pk': self.z_max,
                    'output': 'mPk',
                    'non linear': self.mode,
                    'P_k_max_h/Mpc': self.k_max_h_by_Mpc
                })
        else:
            self.need_cosmo_arguments(
                data, {
                    'z_max_pk': self.z_max,
                    'output': 'mPk',
                    'P_k_max_h/Mpc': self.k_max_h_by_Mpc
                })

        print('Time for loading all data files:', time.time() - start_load)

        fname = os.path.join(self.data_directory, 'number_datapoints.txt')
        np.savetxt(fname, [ndata],
                   header='number of datapoints in masked datavector')

        return
Beispiel #13
0
def get_covariance_matrix(cosmo, data, command_line):
    """
    Compute the covariance matrix, from an input file or from an existing
    matrix.

    Reordering of the names and scaling take place here, in a serie of
    potentially hard to read methods. For the sake of clarity, and to avoid
    confusions, the code will, by default, print out a succession of 4
    covariance matrices at the beginning of the run, if starting from an
    existing one. This way, you can control that the paramters are set
    properly.

    .. note::

        The set of parameters from the run need not to be the exact same
        set of parameters from the existing covariance matrix (not even the
        ordering). Missing parameter from the existing covariance matrix will
        use the sigma given as an input.

    """

    # Setting numpy options in terms of precision (useful when writing to files
    # or displaying a result, but does not affect the precision of the
    # computation).
    np.set_printoptions(precision=2, linewidth=150)
    parameter_names = data.get_mcmc_parameters(['varying'])

    # Define quiet setting if not previously defined
    try:
        command_line.quiet
    except:
        command_line.quiet = False

    if command_line.fisher and not command_line.cov:
        # We will work out the fisher matrix for all the parameters and
        # write it to a file
        if not command_line.silent:
            warnings.warn("Fisher implementation is being tested")

        # Let us create a separate copy of data
        from copy import deepcopy
        # Do not modify data, instead copy
        temp_data = deepcopy(data)
        done = False

        # Create the center dictionary, which will hold the center point
        # information (or best-fit) TODO
        # This dictionary will be updated in case it was too far from the
        # best-fit, and found a non positive-definite symmetric fisher matrix.
        center = {}
        if not command_line.bf:
            for elem in parameter_names:
                temp_data.mcmc_parameters[elem]['current'] = (
                    data.mcmc_parameters[elem]['initial'][0])
                center[elem] = data.mcmc_parameters[elem]['initial'][0]
        else:
            read_args_from_bestfit(temp_data, command_line.bf)
            for elem in parameter_names:
                temp_data.mcmc_parameters[elem]['current'] = (
                    temp_data.mcmc_parameters[elem]['last_accepted'])
                center[elem] = temp_data.mcmc_parameters[elem]['last_accepted']

        # Have a security index that prevents looping indefinitely
        security = 0
        while not done and security < 10:
            security += 1
            # Compute the Fisher matrix and the gradient array at the center
            # point.
            fisher_matrix, gradient = compute_fisher(
                temp_data, cosmo, center, 0.01)

            # Compute inverse of the fisher matrix, catch LinAlgError exception
            fisher_invert_success = True
            try:
                if not command_line.silent:
                    print("Fisher matrix computed:")
                    print(fisher_matrix)
                cov_matrix = np.linalg.inv(fisher_matrix)
            except np.linalg.LinAlgError:
                raise io_mp.ConfigurationError(
                    "Could not find Fisher matrix, please remove the "
                    "option --fisher and run with Metropolis-Hastings "
                    "or another sampling method.")
                fisher_invert_success = False
                done = True

            # Write it to the file
            if fisher_invert_success:
                io_mp.write_covariance_matrix(
                    cov_matrix, parameter_names,
                    os.path.join(command_line.folder, 'covariance_fisher.mat'))

                command_line.cov = os.path.join(
                    command_line.folder, 'covariance_fisher.mat')

                done = True
                # Check if the diagonal elements are non-negative
                for h, elem in enumerate(parameter_names):
                    if cov_matrix[h][h] < 0:
                        warnings.warn(
                            "Covariance has negative values on diagonal, "
                            "moving to a better point and repeating "
                            "the Fisher computation")
                        done = False
                        break

                if not done:
                    # Solve for a step
                    step = np.dot(cov_matrix, gradient)
                    # Now modify data_parameters TODO HERE update center
                    for k, elem in enumerate(parameter_names):
                        data.mcmc_parameters[elem]['initial'][0] = data.mcmc_parameters[elem]['initial'][0]-step[k]
                        temp_data.mcmc_parameters[elem]['initial'][0] = temp_data.mcmc_parameters[elem]['initial'][0]-step[k]
                        print "Moved %s to:"%(elem),data.mcmc_parameters[elem]['initial'][0]

    # if the user provides a .covmat file or if user asks to compute a fisher matrix
    if command_line.cov is not None:

        cov = open('{0}'.format(command_line.cov), 'r')

        i = 0
        for line in cov:
            if line.find('#') != -1:
                # Extract the names from the first line
                covnames = line.strip('#').replace(' ', '').\
                    replace('\n', '').split(',')
                # Initialize the matrices
                matrix = np.zeros((len(covnames), len(covnames)), 'float64')
                rot = np.zeros((len(covnames), len(covnames)))
            else:
                line = line.split()
                for j in range(len(line)):
                    matrix[i][j] = np.array(line[j], 'float64')
                i += 1

        # First print out
        if not command_line.silent and not command_line.quiet:
            print('\nInput covariance matrix:')
            print(covnames)
            print(matrix)
        # Deal with the all problematic cases.
        # First, adjust the scales between stored parameters and the ones used
        # in mcmc
        scales = []
        for elem in covnames:
            if elem in parameter_names:
                scales.append(data.mcmc_parameters[elem]['scale'])
            else:
                scales.append(1)
        scales = np.diag(scales)
        # Compute the inverse matrix, and assert that the computation was
        # precise enough, by comparing the product to the identity matrix.
        invscales = np.linalg.inv(scales)
        np.testing.assert_array_almost_equal(
            np.dot(scales, invscales), np.eye(np.shape(scales)[0]),
            decimal=5)

        # Apply the newly computed scales to the input matrix
        matrix = np.dot(invscales.T, np.dot(matrix, invscales))

        # Second print out, after having applied the scale factors
        if not command_line.silent and not command_line.quiet:
            print('\nFirst treatment (scaling)')
            print(covnames)
            print(matrix)

        # Rotate matrix for the parameters to be well ordered, even if some
        # names are missing or some are in extra.
        # First, store the parameter names in temp_names that also appear in
        # the covariance matrix, in the right ordering for the code (might be
        # different from the input matri)
        temp_names = [elem for elem in parameter_names if elem in covnames]

        # If parameter_names contains less things than covnames, we will do a
        # small trick. Create a second temporary array, temp_names_2, that will
        # have the same dimension as covnames, and containing:
        # - the elements of temp_names, in the order of parameter_names (h
        # index)
        # - an empty string '' for the remaining unused parameters
        temp_names_2 = []
        h = 0
        not_in = [elem for elem in covnames if elem not in temp_names]
        for k in range(len(covnames)):
            if covnames[k] not in not_in:
                temp_names_2.append(temp_names[h])
                h += 1
            else:
                temp_names_2.append('')

        # Create the rotation matrix, that will put the covariance matrix in
        # the right order, and also assign zeros to the unused parameters from
        # the input. These empty columns will be removed in the next step.
        for k in range(len(covnames)):
            for h in range(len(covnames)):
                try:
                    if covnames[k] == temp_names_2[h]:
                        rot[h][k] = 1.
                    else:
                        rot[h][k] = 0.
                except IndexError:
                    # The IndexError exception means that we are dealing with
                    # an unused parameter. By enforcing the corresponding
                    # rotation matrix element to 0, the resulting matrix will
                    # still have the same size as the original, but with zeros
                    # on the unused lines.
                    rot[h][k] = 0.
        matrix = np.dot(rot, np.dot(matrix, np.transpose(rot)))

        # Third print out
        if not command_line.silent and not command_line.quiet:
            print('\nSecond treatment (partial reordering and cleaning)')
            print(temp_names_2)
            print(matrix)

        # Final step, creating a temporary matrix, filled with 1, that will
        # eventually contain the result.
        matrix_temp = np.ones((len(parameter_names),
                               len(parameter_names)), 'float64')
        indices_final = np.zeros(len(parameter_names))
        indices_initial = np.zeros(len(covnames))
        # Remove names that are in parameter names but not in covnames, and
        # set to zero the corresponding columns of the final result.
        for k in range(len(parameter_names)):
            if parameter_names[k] in covnames:
                indices_final[k] = 1
        for zeros in np.where(indices_final == 0)[0]:
            matrix_temp[zeros, :] = 0
            matrix_temp[:, zeros] = 0
        # Remove names that are in covnames but not in param_names
        for h in range(len(covnames)):
            if covnames[h] in parameter_names:
                indices_initial[h] = 1
        # There, put a place holder number (we are using a pure imaginary
        # number: i, to avoid any problem) in the initial matrix, so that the
        # next step only copy the interesting part of the input to the final
        # matrix.
        max_value = np.finfo(np.float64).max
        for zeros in np.where(indices_initial == 0)[0]:
            matrix[zeros, :] = [max_value for _ in range(
                len(matrix[zeros, :]))]
            matrix[:, zeros] = [max_value for _ in range(
                len(matrix[:, zeros]))]
        # Now put in the temporary matrix, where the 1 were, the interesting
        # quantities from the input (the one that are not equal to i).
        matrix_temp[matrix_temp == 1] = matrix[matrix != max_value]
        matrix = np.copy(matrix_temp)
        # on all other lines, that contain 0, just use sigma^2
        for zeros in np.where(indices_final == 0)[0]:
            matrix[zeros, zeros] = np.array(
                data.mcmc_parameters[parameter_names[zeros]]['initial'][3],
                'float64')**2
    # else, take sigmas^2.
    else:
        matrix = np.identity(len(parameter_names), 'float64')
        for index, elem in enumerate(parameter_names):
            matrix[index][index] = np.array(
                data.mcmc_parameters[elem]['initial'][3], 'float64')**2


    # Final print out, the actually used covariance matrix
    if not command_line.silent and not command_line.quiet:
        sys.stdout.write('\nDeduced starting covariance matrix:\n')
        print(parameter_names)
        print(matrix)

    #inverse, and diagonalization
    eigv, eigV = np.linalg.eig(np.linalg.inv(matrix))
    return eigv, eigV, matrix
    def __init__(self, path, data, command_line):

        Likelihood.__init__(self, path, data, command_line)

        # Check if the data can be found, although we don't actually use that
        # particular file but take it as a placeholder for the folder
        try:
            fname = os.path.join(
                self.data_directory,
                'DATA_VECTOR/KiDS-450_xi_pm_tomographic_data_vector.dat')
            parser_mp.existing_file(fname)
        except:
            raise io_mp.ConfigurationError(
                'KiDS-450 CF data not found. Download the data at '
                'http://kids.strw.leidenuniv.nl/sciencedata.php '
                'and specify path to data through the variable '
                'kids450_cf_2cosmos_likelihood_public.data_directory in '
                'the .data file. See README in likelihood folder '
                'for further instructions.')

        # for loading of Nz-files:
        self.z_bins_min = [0.1, 0.3, 0.5, 0.7]
        self.z_bins_max = [0.3, 0.5, 0.7, 0.9]

        # number of angular bins in which xipm is measured
        # we always load the full data vector with 9 data points for xi_p and
        # xi_m each; they are cut to the fiducial scales (or any arbitrarily
        # defined scales with the 'cut_values.dat' files!
        self.ntheta = 9

        # Force the cosmological module to store Pk for redshifts up to
        # max(self.z) and for k up to k_max
        self.need_cosmo1_arguments(data, {'output': 'mPk'})
        self.need_cosmo1_arguments(data,
                                   {'P_k_max_h/Mpc': self.k_max_h_by_Mpc})
        self.need_cosmo2_arguments(data, {'output': 'mPk'})
        self.need_cosmo2_arguments(data,
                                   {'P_k_max_h/Mpc': self.k_max_h_by_Mpc})

        # Compute non-linear power spectrum if requested:
        if self.method_non_linear_Pk in [
                'halofit', 'HALOFIT', 'Halofit', 'hmcode', 'Hmcode', 'HMcode',
                'HMCODE'
        ]:
            self.need_cosmo1_arguments(
                data, {'non linear': self.method_non_linear_Pk})
            self.need_cosmo2_arguments(
                data, {'non linear': self.method_non_linear_Pk})
            print('Using {:} to obtain the non-linear P(k, z)!'.format(
                self.method_non_linear_Pk))
        else:
            print(
                'Only using the linear P(k, z) for ALL calculations \n (check keywords for "method_non_linear_Pk").'
            )

        self.nzbins = len(self.z_bins_min)
        self.nzcorrs = self.nzbins * (self.nzbins + 1) // 2

        # Create labels for loading of dn/dz-files:
        self.zbin_labels = []
        for i in xrange(self.nzbins):
            self.zbin_labels += [
                '{:.1f}t{:.1f}'.format(self.z_bins_min[i], self.z_bins_max[i])
            ]

        # Define array of l values, and initialize them
        # It is a logspace
        # find nlmax in order to reach lmax with logarithmic steps dlnl
        self.nlmax = np.int(np.log(self.lmax) / self.dlnl) + 1
        # redefine slightly dlnl so that the last point is always exactly lmax
        self.dlnl = np.log(self.lmax) / (self.nlmax - 1)
        self.l = np.exp(self.dlnl * np.arange(self.nlmax))

        #TODO: not really needed when bootstrap-errors are selected...
        # Read fiducial dn_dz from window files:
        # TODO: zmin and zmax are hardcoded to fiducial lower and upper limit
        # of midpoint histogram!
        self.z_p = np.linspace(0.025, 3.475, self.nzmax)
        self.pz = np.zeros((self.nzmax, self.nzbins))
        self.pz_norm = np.zeros(self.nzbins, 'float64')
        for zbin in xrange(self.nzbins):
            window_file_path = os.path.join(
                self.data_directory,
                'Nz_{0:}/Nz_{0:}_Mean/Nz_{0:}_z{1:}.asc'.format(
                    self.nz_method, self.zbin_labels[zbin]))
            if os.path.exists(window_file_path):
                zptemp, hist_pz = np.loadtxt(window_file_path,
                                             usecols=[0, 1],
                                             unpack=True)
                if zbin > 0:
                    zpcheck = zptemp
                    if np.sum((zptemp - zpcheck)**2) > 1e-6:
                        raise io_mp.LikelihoodError(
                            'The redshift values for the window files at different bins do not match.'
                        )
                print('Loaded n(zbin{:}) from: \n'.format(zbin + 1),
                      window_file_path)
                # we assume that the histograms loaded are given as left-border histograms
                # and that the z-spacing is the same for each histogram
                shift_to_midpoint = np.diff(zptemp)[0] / 2.
                spline_pz = itp.splrep(zptemp + shift_to_midpoint, hist_pz)
                z_mod = self.z_p  #+ self.shift_by_dz[zbin]
                mask_min = z_mod >= zptemp.min()
                mask_max = z_mod <= zptemp.max()
                mask = mask_min & mask_max
                self.pz[mask, zbin] = itp.splev(z_mod[mask], spline_pz)
                # Normalize selection functions
                dz = self.z_p[1:] - self.z_p[:-1]
                self.pz_norm[zbin] = np.sum(
                    0.5 * (self.pz[1:, zbin] + self.pz[:-1, zbin]) * dz)
            else:
                raise io_mp.LikelihoodError("File not found:\n %s" %
                                            window_file_path)

        self.zmax = self.z_p.max()
        self.need_cosmo1_arguments(data, {'z_max_pk': self.zmax})
        self.need_cosmo2_arguments(data, {'z_max_pk': self.zmax})

        # read in public data vector:
        temp = self.__load_public_data_vector()
        self.theta_bins = temp[:, 0]
        if (np.sum(
            (self.theta_bins[:self.ntheta] - self.theta_bins[self.ntheta:])**2)
                > 1e-6):
            raise io_mp.LikelihoodError(
                'The angular values at which xi+ and xi- '
                'are observed do not match')

        # create the data-vector in the following format (due to covariance structure):
        # xi_obs = {xi1(theta1, z_11)...xi1(theta_k, z_11), xi2(theta_1, z_11)...
        #           xi2(theta_k, z_11);...; xi1(theta1, z_nn)...xi1(theta_k, z_nn),
        #           xi2(theta_1, z_nn)... xi2(theta_k, z_nn)}
        xi_obs = self.__get_xi_obs(temp[:, 1:])

        # concatenate xi_obs with itself to create the ueberdata-vector:
        self.xi_obs_1 = xi_obs
        self.xi_obs_2 = xi_obs

        xi_obs_combined = np.concatenate((xi_obs, xi_obs))

        # now load the full covariance matrix:
        covmat_block = self.__load_public_cov_mat()

        # build a combined cov-mat, for that to work we assume, that the cov-mat dimension fits
        # to the size of the *uncut*, single data-vector and is ordered in the same way as the
        # *final* data-vector created here (i.e. vec = [xi+(1,1), xi-(1,1), xi+(1,2), xi-(1,2),...]!
        covmat = np.asarray(
            np.bmat('covmat_block, covmat_block; covmat_block, covmat_block'))

        # Read angular cut values (OPTIONAL)
        # 1 --> fiducial scales
        # 2 --> large scales

        # Read angular cut values (OPTIONAL)
        if self.use_cut_theta:
            cut_values1 = np.zeros((self.nzbins, 2))
            cut_values2 = np.zeros((self.nzbins, 2))

            cutvalues_file_path1 = os.path.join(
                self.data_directory, 'CUT_VALUES/' + self.cutvalues_file1)
            if os.path.exists(cutvalues_file_path1):
                cut_values1 = np.loadtxt(cutvalues_file_path1)
            else:
                raise io_mp.LikelihoodError(
                    'File not found:\n {:} \n Check that requested file was copied to:\n {:}'
                    .format(cutvalues_file_path1,
                            self.data_directory + 'CUT_VALUES/'))

            cutvalues_file_path2 = os.path.join(
                self.data_directory, 'CUT_VALUES/' + self.cutvalues_file2)
            if os.path.exists(cutvalues_file_path2):
                cut_values2 = np.loadtxt(cutvalues_file_path2)
            else:
                raise io_mp.LikelihoodError(
                    'File not found:\n {:} \n Check that requested file was copied to:\n {:}'
                    .format(cutvalues_file_path2,
                            self.data_directory + 'CUT_VALUES/'))

        # Compute theta mask
        if self.use_cut_theta:
            mask1 = self.__get_mask(cut_values1)
            mask2 = self.__get_mask(cut_values2)
        else:
            mask1 = np.ones(2 * self.nzcorrs * self.ntheta)
            mask2 = np.ones(2 * self.nzcorrs * self.ntheta)

        #print(mask1, len(np.where(mask1 == 1)[0]))
        #print(mask2, len(np.where(mask2 == 1)[0]))
        # for tomographic splits:
        # e.g.
        # mask1 = fiducial
        # mask2 = z-bin 3 only (gives also all cross_powers)
        # --> mask1 = mask1 - mask2 --> all remaining bin combinations
        if self.subtract_mask2_from_mask1:
            mask1 = mask1 - mask2

        #print(mask1, len(np.where(mask1 == 1)[0]))
        #print(mask2, len(np.where(mask2 == 1)[0]))

        self.mask_indices1 = np.where(mask1 == 1)[0]
        self.mask_indices2 = np.where(mask2 == 1)[0]

        # combine "fiducial" mask and "large scales" mask:
        # this is wrong, because indices in second half are only wrt. first half!!!
        #self.mask_indices = np.concatenate((self.mask_indices1, self.mask_indices2))

        # combine "fiducial" mask and "large scales" mask:
        mask = np.concatenate((mask1, mask2))
        self.mask_indices = np.where(mask == 1)[0]

        # apply equation 12 from Hildebrandt et al. 2017 to covmat:
        # this assumes that m-correction was already applied to data-vector!
        if self.marginalize_over_multiplicative_bias_uncertainty:
            cov_m_corr = np.matrix(
                xi_obs_combined[self.mask_indices]).T * np.matrix(
                    xi_obs_combined[self.mask_indices]
                ) * 4. * self.err_multiplicative_bias**2
            #covmat = covmat[self.mask_indices][:, self.mask_indices] + np.asarray(cov_m_corr)
            covmat = covmat[np.ix_(self.mask_indices,
                                   self.mask_indices)] + np.asarray(cov_m_corr)
        else:
            #covmat = covmat[self.mask_indices][:, self.mask_indices]
            covmat = covmat[np.ix_(self.mask_indices, self.mask_indices)]

        fname = self.data_directory + 'cov_matrix_ana_comb_cut.dat'
        np.savetxt(fname, covmat)
        print('Saved trimmed covariance to: \n', fname)

        # precompute Cholesky transform for chi^2 calculation:
        self.cholesky_transform = cholesky(covmat, lower=True)

        # Fill array of discrete z values
        # self.z = np.linspace(0, self.zmax, num=self.nzmax)
        '''
        ################
        # Noise spectrum
        ################
        # only useful for theoretical signal

        # Number of galaxies per steradian
        self.noise = 3600.*self.gal_per_sqarcmn*(180./math.pi)**2

        # Number of galaxies per steradian per bin
        self.noise = self.noise/self.nzbins

        # Noise spectrum (diagonal in bin*bin space, independent of l and Bin)
        self.noise = self.rms_shear**2/self.noise
        '''

        ################################################
        # discrete theta values (to convert C_l to xi's)
        ################################################

        thetamin = np.min(self.theta_bins) * 0.8
        thetamax = np.max(self.theta_bins) * 1.2

        self.nthetatot = np.ceil(
            math.log(thetamax / thetamin) / self.dlntheta) + 1
        self.nthetatot = np.int32(self.nthetatot)
        self.theta = np.zeros(self.nthetatot, 'float64')
        self.a2r = math.pi / (180. * 60.)

        # define an array of theta's
        for it in xrange(self.nthetatot):
            self.theta[it] = thetamin * math.exp(self.dlntheta * it)

        ################################################################
        # discrete l values used in the integral to convert C_l to xi's)
        ################################################################

        # l = x / theta / self.a2r
        # x = l * theta * self.a2r

        # We start by considering the largest theta, theta[-1], and for that value we infer
        # a list of l's from the requirement that corresponding x values are spaced linearly with a given stepsize, until xmax.
        # Then we loop over smaller theta values, in decreasing order, and for each of them we complete the previous list of l's,
        # always requiuring the same dx stepsize (so that dl does vary) up to xmax.
        #
        # We first apply this to a running value ll, in order to count the total numbner of ll's, called nl.
        # Then we create the array lll[nl] and we fill it with the same values.
        #
        # we also compute on the fly the critical index il_max[it] such that ll[il_max[it]]*self.theta[it]*self.a2r
        # is the first value of x above xmax

        ll = 1.
        il = 0
        while (ll * self.theta[-1] * self.a2r < self.dx_threshold):
            ll += self.dx_below_threshold / self.theta[-1] / self.a2r
            il += 1
        for it in xrange(self.nthetatot):
            while (ll * self.theta[self.nthetatot - 1 - it] * self.a2r <
                   self.xmax) and (ll + self.dx_above_threshold /
                                   self.theta[self.nthetatot - 1 - it] /
                                   self.a2r < self.lmax):
                ll += self.dx_above_threshold / self.theta[self.nthetatot - 1 -
                                                           it] / self.a2r
                il += 1
        self.nl = il + 1

        self.lll = np.zeros(self.nl, 'float64')
        self.il_max = np.zeros(self.nthetatot, 'int')
        il = 0
        self.lll[il] = 1.
        while (self.lll[il] * self.theta[-1] * self.a2r < self.dx_threshold):
            il += 1
            self.lll[il] = self.lll[
                il - 1] + self.dx_below_threshold / self.theta[-1] / self.a2r
        for it in xrange(self.nthetatot):
            while (self.lll[il] * self.theta[self.nthetatot - 1 - it] *
                   self.a2r < self.xmax) and (
                       self.lll[il] + self.dx_above_threshold /
                       self.theta[self.nthetatot - 1 - it] / self.a2r <
                       self.lmax):
                il += 1
                self.lll[il] = self.lll[
                    il - 1] + self.dx_above_threshold / self.theta[
                        self.nthetatot - 1 - it] / self.a2r
            self.il_max[self.nthetatot - 1 - it] = il

        # finally we compute the array l*dl that will be used in the trapezoidal integration
        # (l is a factor in the integrand [l * C_l * Bessel], and dl is like a weight)
        self.ldl = np.zeros(self.nl, 'float64')
        self.ldl[0] = self.lll[0] * 0.5 * (self.lll[1] - self.lll[0])
        for il in xrange(1, self.nl - 1):
            self.ldl[il] = self.lll[il] * 0.5 * (self.lll[il + 1] -
                                                 self.lll[il - 1])
        self.ldl[-1] = self.lll[-1] * 0.5 * (self.lll[-1] - self.lll[-2])

        return
def initialise(cosmo, data, command_line):
    """
    Main call to prepare the information for the NeuralNest run.
    """

    # Convenience variables
    varying_param_names = data.get_mcmc_parameters(['varying'])
    derived_param_names = data.get_mcmc_parameters(['derived'])

    if getattr(command_line, NN_prefix + 'sampler', '').lower() == 'nested':

        # Check that all the priors are flat and that all the parameters are bound
        is_flat, is_bound = sampler.check_flat_bound_priors(
            data.mcmc_parameters, varying_param_names)
        if not is_flat:
            raise io_mp.ConfigurationError(
                'Nested Sampling with NeuralNest is only possible with flat ' +
                'priors. Sorry!')
        if not is_bound:
            raise io_mp.ConfigurationError(
                'Nested Sampling with NeuralNest is only possible for bound ' +
                'parameters. Set reasonable bounds for them in the ".param"' +
                'file.')

    # If absent, create the sub-folder NS
    NN_folder = os.path.join(command_line.folder, NN_subfolder)
    if not os.path.exists(NN_folder):
        os.makedirs(NN_folder)

    run_num = sum(
        os.path.isdir(os.path.join(NN_folder, i))
        for i in os.listdir(NN_folder)) + 1

    # -- Automatic arguments
    data.NN_arguments['x_dim'] = len(varying_param_names)
    data.NN_arguments['num_derived'] = len(derived_param_names)
    data.NN_arguments['verbose'] = True
    data.NN_arguments['log_dir'] = os.path.join(NN_folder, str(run_num))
    data.NN_arguments['use_gpu'] = False
    data.NN_arguments['flow'] = 'nvp'
    data.NN_arguments['load_model'] = ''
    data.NN_arguments['batch_size'] = 100

    if getattr(command_line, NN_prefix + 'fastslow'):
        data.NN_arguments['num_slow'] = data.block_parameters[0]
    else:
        data.NN_arguments['num_slow'] = 0

    # -- User-defined arguments
    for arg in NN_user_arguments:
        value = getattr(command_line, NN_prefix + arg)
        data.NN_arguments[arg] = value
        if arg == 'switch':
            if value >= 0:
                data.NN_arguments['switch'] = value
            elif data.NN_arguments['num_slow'] > 0:
                data.NN_arguments['switch'] = 1.0 / (
                    5 * data.NN_arguments['num_slow'])

    if getattr(command_line, NN_prefix + 'sampler', '').lower() == 'mcmc':
        data.NN_arguments['mcmc_steps'] = getattr(command_line, 'N')

    data.NN_param_names = varying_param_names

    base_name = os.path.join(NN_folder, 'base')

    if run_num == 1:
        # Write the NeuralNest arguments and parameter ordering
        with open(base_name + name_arguments, 'w') as afile:
            for arg in data.NN_arguments:
                afile.write(' = '.join([str(arg),
                                        str(data.NN_arguments[arg])]))
                afile.write('\n')
        with open(base_name + name_paramnames, 'w') as pfile:
            pfile.write('\n'.join(data.NN_param_names + derived_param_names))
Beispiel #16
0
    def __init__(self, path, data, command_line):

        Likelihood.__init__(self, path, data, command_line)

        # Check if the data can be found, although we don't actually use that
        # particular file but take it as a placeholder for the folder
        try:
            fname = os.path.join(
                self.data_directory,
                'DATA_VECTOR/KiDS-450_xi_pm_tomographic_data_vector.dat')
            parser_mp.existing_file(fname)
        except:
            raise io_mp.ConfigurationError(
                'KiDS-450 CF data not found. Download the data at '
                'http://kids.strw.leidenuniv.nl/sciencedata.php '
                'and specify path to data through the variable '
                'kids450_cf_likelihood_public.data_directory in '
                'the .data file. See README in likelihood folder '
                'for further instructions.')

        # for loading of Nz-files:
        self.z_bins_min = [0.1, 0.3, 0.5, 0.7]
        self.z_bins_max = [0.3, 0.5, 0.7, 0.9]

        # number of angular bins in which xipm is measured
        # we always load the full data vector with 9 data points for xi_p and
        # xi_m each; they are cut to the fiducial scales (or any arbitrarily
        # defined scales with the 'cut_values.dat' files!
        self.ntheta = 9

        # Force the cosmological module to store Pk for redshifts up to
        # max(self.z) and for k up to k_max
        self.need_cosmo_arguments(data, {'output': 'mPk'})
        self.need_cosmo_arguments(data, {'P_k_max_h/Mpc': self.k_max_h_by_Mpc})

        # Compute non-linear power spectrum if requested:
        if self.method_non_linear_Pk in [
                'halofit', 'HALOFIT', 'Halofit', 'hmcode', 'Hmcode', 'HMcode',
                'HMCODE'
        ]:
            self.need_cosmo_arguments(
                data, {'non linear': self.method_non_linear_Pk})
            print('Using {:} to obtain the non-linear P(k, z)!'.format(
                self.method_non_linear_Pk))
        else:
            print(
                'Only using the linear P(k, z) for ALL calculations \n (check keywords for "method_non_linear_Pk").'
            )

        # Define array of l values, and initialize them
        # It is a logspace
        # find nlmax in order to reach lmax with logarithmic steps dlnl
        self.nlmax = np.int(np.log(self.lmax) / self.dlnl) + 1
        # redefine slightly dlnl so that the last point is always exactly lmax
        self.dlnl = np.log(self.lmax) / (self.nlmax - 1)
        self.l = np.exp(self.dlnl * np.arange(self.nlmax))

        self.nzbins = len(self.z_bins_min)
        self.nzcorrs = self.nzbins * (self.nzbins + 1) / 2

        # Create labels for loading of dn/dz-files:
        self.zbin_labels = []
        for i in xrange(self.nzbins):
            self.zbin_labels += [
                '{:.1f}t{:.1f}'.format(self.z_bins_min[i], self.z_bins_max[i])
            ]

        # read in public data vector:
        temp = self.__load_public_data_vector()
        self.theta_bins = temp[:, 0]
        if (np.sum(
            (self.theta_bins[:self.ntheta] - self.theta_bins[self.ntheta:])**2)
                > 1e-6):
            raise io_mp.LikelihoodError(
                'The angular values at which xi+ and xi- '
                'are observed do not match')

        # create the data-vector in the following format (due to covariance structure):
        # xi_obs = {xi1(theta1, z_11)...xi1(theta_k, z_11), xi2(theta_1, z_11)...
        #           xi2(theta_k, z_11);...; xi1(theta1, z_nn)...xi1(theta_k, z_nn),
        #           xi2(theta_1, z_nn)... xi2(theta_k, z_nn)}
        self.xi_obs = self.__get_xi_obs(temp[:, 1:])

        # now load the full covariance matrix:
        covmat = self.__load_public_cov_mat()

        # Read angular cut values (OPTIONAL)
        if self.use_cut_theta:
            cut_values1 = np.zeros((self.nzbins, 2))
            cut_values2 = np.zeros((self.nzbins, 2))

            cutvalues_file_path1 = os.path.join(
                self.data_directory, 'CUT_VALUES/' + self.cutvalues_file1)
            if os.path.exists(cutvalues_file_path1):
                cut_values1 = np.loadtxt(cutvalues_file_path1)
            else:
                raise io_mp.LikelihoodError(
                    'File not found:\n {:} \n Check that requested file was copied to:\n {:}'
                    .format(cutvalues_file_path1,
                            self.data_directory + 'CUT_VALUES/'))

            if self.subtract_mask2_from_mask1:
                cutvalues_file_path2 = os.path.join(
                    self.data_directory, 'CUT_VALUES/' + self.cutvalues_file2)
                if os.path.exists(cutvalues_file_path2):
                    cut_values2 = np.loadtxt(cutvalues_file_path2)
                else:
                    raise io_mp.LikelihoodError(
                        'File not found:\n {:} \n Check that requested file was copied to:\n {:}'
                        .format(cutvalues_file_path2,
                                self.data_directory + 'CUT_VALUES/'))

        # Compute theta mask
        if self.use_cut_theta:
            mask1 = self.__get_mask(cut_values1)
            if self.subtract_mask2_from_mask1:
                mask2 = self.__get_mask(cut_values2)
                mask = mask1 - mask2
            else:
                mask = mask1
        else:
            mask = np.ones(2 * self.nzcorrs * self.ntheta)

        self.mask_indices = np.where(mask == 1)[0]
        fname = os.path.join(self.data_directory, 'kids450_xipm_4bin_cut.dat')
        np.savetxt(fname, self.xi_obs[self.mask_indices])

        # propagate uncertainty of m-correction following equation (12) in
        # Hildebrandt et al. 2017 (arXiv:1606.05338) with \sigma_m = 0.01
        # NOTE: following Troxel et al. 2018 (arXiv:1804.10663) it is NOT
        # correct to use the noisy data vector for this; instead one should use
        # a theory vector (e.g. derived for the same cosmology for which the
        # analytical covariance was calculated).
        fname = os.path.join(self.data_directory, 'cov_matrix_ana_cut.dat')
        if self.marginalize_over_multiplicative_bias_uncertainty:
            cov_m_corr = np.matrix(
                self.xi_obs[self.mask_indices]).T * np.matrix(self.xi_obs[
                    self.mask_indices]) * 4. * self.err_multiplicative_bias**2
            covmat = covmat[self.mask_indices][:,
                                               self.mask_indices] + np.asarray(
                                                   cov_m_corr)
            #covmat = covmat[np.ix_(self.mask_indices, self.mask_indices)]
            np.savetxt(fname, covmat)
            #covmat = covmat + np.asarray(cov_m_corr)
        else:
            #covmat = covmat[self.mask_indices][:, self.mask_indices]
            covmat = covmat[np.ix_(self.mask_indices, self.mask_indices)]
            np.savetxt(fname, covmat)

        # precompute Cholesky transform for chi^2 calculation:
        self.cholesky_transform = cholesky(covmat, lower=True)

        # Read fiducial dn_dz from window files:
        #self.z_p = np.zeros(self.nzmax)
        # TODO: the hardcoded z_min and z_max correspond to the lower and upper
        # endpoints of the shifted left-border histogram!
        self.z_p = np.linspace(0.025, 3.475, self.nzmax)
        self.pz = np.zeros((self.nzmax, self.nzbins))
        self.pz_norm = np.zeros(self.nzbins, 'float64')
        for zbin in xrange(self.nzbins):
            window_file_path = os.path.join(
                self.data_directory,
                'Nz_{0:}/Nz_{0:}_Mean/Nz_{0:}_z{1:}.asc'.format(
                    self.nz_method, self.zbin_labels[zbin]))

            zptemp, hist_pz = np.loadtxt(window_file_path,
                                         usecols=[0, 1],
                                         unpack=True)
            if zbin > 0:
                zpcheck = zptemp
                if np.sum((zptemp - zpcheck)**2) > 1e-6:
                    raise io_mp.LikelihoodError(
                        'The redshift values for the window files at different bins do not match.'
                    )
            print('Loaded n(zbin{:}) from: \n'.format(zbin + 1),
                  window_file_path)
            # we assume that the histograms loaded are given as left-border histograms
            # and that the z-spacing is the same for each histogram
            shift_to_midpoint = np.diff(zptemp)[0] / 2.
            spline_pz = itp.splrep(zptemp + shift_to_midpoint, hist_pz)
            z_mod = self.z_p  #+ shift_by_dz[zbin]
            mask_min = z_mod >= zptemp.min()
            mask_max = z_mod <= zptemp.max()
            mask = mask_min & mask_max
            # points outside the z-range of the histograms are set to 0!
            self.pz[mask, zbin] = itp.splev(z_mod[mask], spline_pz)
            # Normalize selection functions
            dz = self.z_p[1:] - self.z_p[:-1]
            self.pz_norm[zbin] = np.sum(
                0.5 * (self.pz[1:, zbin] + self.pz[:-1, zbin]) * dz)

        self.zmax = self.z_p.max()
        self.need_cosmo_arguments(data, {'z_max_pk': self.zmax})

        ################################################
        # discrete theta values (to convert C_l to xi's)
        ################################################

        thetamin = np.min(self.theta_bins) * 0.8
        thetamax = np.max(self.theta_bins) * 1.2

        self.nthetatot = np.ceil(
            math.log(thetamax / thetamin) / self.dlntheta) + 1
        self.nthetatot = np.int32(self.nthetatot)
        self.theta = np.zeros(self.nthetatot, 'float64')
        self.a2r = math.pi / (180. * 60.)

        # define an array of theta's
        for it in xrange(self.nthetatot):
            self.theta[it] = thetamin * math.exp(self.dlntheta * it)

        ################################################################
        # discrete l values used in the integral to convert C_l to xi's)
        ################################################################

        # l = x / theta / self.a2r
        # x = l * theta * self.a2r

        # We start by considering the largest theta, theta[-1], and for that value we infer
        # a list of l's from the requirement that corresponding x values are spaced linearly with a given stepsize, until xmax.
        # Then we loop over smaller theta values, in decreasing order, and for each of them we complete the previous list of l's,
        # always requiuring the same dx stepsize (so that dl does vary) up to xmax.
        #
        # We first apply this to a running value ll, in order to count the total numbner of ll's, called nl.
        # Then we create the array lll[nl] and we fill it with the same values.
        #
        # we also compute on the fly the critical index il_max[it] such that ll[il_max[it]]*self.theta[it]*self.a2r
        # is the first value of x above xmax

        ll = 1.
        il = 0
        while (ll * self.theta[-1] * self.a2r < self.dx_threshold):
            ll += self.dx_below_threshold / self.theta[-1] / self.a2r
            il += 1
        for it in xrange(self.nthetatot):
            while (ll * self.theta[self.nthetatot - 1 - it] * self.a2r <
                   self.xmax) and (ll + self.dx_above_threshold /
                                   self.theta[self.nthetatot - 1 - it] /
                                   self.a2r < self.lmax):
                ll += self.dx_above_threshold / self.theta[self.nthetatot - 1 -
                                                           it] / self.a2r
                il += 1
        self.nl = il + 1

        self.lll = np.zeros(self.nl, 'float64')
        self.il_max = np.zeros(self.nthetatot, 'int')
        il = 0
        self.lll[il] = 1.
        while (self.lll[il] * self.theta[-1] * self.a2r < self.dx_threshold):
            il += 1
            self.lll[il] = self.lll[
                il - 1] + self.dx_below_threshold / self.theta[-1] / self.a2r
        for it in xrange(self.nthetatot):
            while (self.lll[il] * self.theta[self.nthetatot - 1 - it] *
                   self.a2r < self.xmax) and (
                       self.lll[il] + self.dx_above_threshold /
                       self.theta[self.nthetatot - 1 - it] / self.a2r <
                       self.lmax):
                il += 1
                self.lll[il] = self.lll[
                    il - 1] + self.dx_above_threshold / self.theta[
                        self.nthetatot - 1 - it] / self.a2r
            self.il_max[self.nthetatot - 1 - it] = il

        # finally we compute the array l*dl that will be used in the trapezoidal integration
        # (l is a factor in the integrand [l * C_l * Bessel], and dl is like a weight)
        self.ldl = np.zeros(self.nl, 'float64')
        self.ldl[0] = self.lll[0] * 0.5 * (self.lll[1] - self.lll[0])
        for il in xrange(1, self.nl - 1):
            self.ldl[il] = self.lll[il] * 0.5 * (self.lll[il + 1] -
                                                 self.lll[il - 1])
        self.ldl[-1] = self.lll[-1] * 0.5 * (self.lll[-1] - self.lll[-2])

        #####################################################################
        # Allocation of various arrays filled and used in the function loglkl
        #####################################################################

        self.r = np.zeros(self.nzmax, 'float64')
        self.dzdr = np.zeros(self.nzmax, 'float64')
        self.g = np.zeros((self.nzmax, self.nzbins), 'float64')
        self.pk = np.zeros((self.nlmax, self.nzmax), 'float64')
        self.k_sigma = np.zeros(self.nzmax, 'float64')
        self.alpha = np.zeros((self.nlmax, self.nzmax), 'float64')
        if 'epsilon' in self.use_nuisance:
            self.E_th_nu = np.zeros((self.nlmax, self.nzmax), 'float64')
        self.Cl_integrand = np.zeros((self.nzmax, self.nzcorrs), 'float64')
        self.Cl = np.zeros((self.nlmax, self.nzcorrs), 'float64')
        '''
        if self.theoretical_error != 0:
            self.El_integrand = np.zeros((self.nzmax, self.nzcorrs),'float64')
            self.El = np.zeros((self.nlmax, self.nzcorrs), 'float64')
        '''
        self.spline_Cl = np.empty(self.nzcorrs, dtype=(list, 3))
        self.xi1 = np.zeros((self.nthetatot, self.nzcorrs), 'float64')
        self.xi2 = np.zeros((self.nthetatot, self.nzcorrs), 'float64')
        self.Cll = np.zeros((self.nzcorrs, self.nl), 'float64')
        self.BBessel0 = np.zeros(self.nl, 'float64')
        self.BBessel4 = np.zeros(self.nl, 'float64')
        self.xi1_theta = np.empty(self.nzcorrs, dtype=(list, 3))
        self.xi2_theta = np.empty(self.nzcorrs, dtype=(list, 3))
        self.xi = np.zeros(np.size(self.xi_obs), 'float64')

        return
Beispiel #17
0
def parse(custom_command=''):
    """
    Check some basic organization of the folder, and exit the program in case
    something goes wrong.

    Keyword Arguments
    -----------------
    custom_command : str
        For testing purposes, instead of reading the command line argument,
        read instead the given string. It should ommit the start of the
        command, so e.g.: '-N 10 -o toto/'

    """
    # Create the parser
    parser = create_parser()

    # Recover all command line arguments in the args dictionary, except for a
    # test, where the custom_command string is read.
    # Note that the function safe_parse_args is read instead of parse_args. It
    # is a function defined in this file to allow for a default subparser.
    if not custom_command:
        args = parser.safe_parse_args()
    else:
        args = parser.safe_parse_args(custom_command.split(' '))

    # Some check to perform when running the MCMC chains is requested
    if args.subparser_name == "run":

        # If the user wants to start over from an existing chain, the program
        # will use automatically the same folder, and the log.param in it
        if args.restart is not None:
            args.folder = os.path.sep.join(
                args.restart.split(os.path.sep)[:-1])
            args.param = os.path.join(args.folder, 'log.param')
            warnings.warn("Restarting from %s." % args.restart +
                          " Using associated log.param.")

        # Else, the user should provide an output folder
        else:
            if args.folder is None:
                raise io_mp.ConfigurationError(
                    "You must provide an output folder, because you do not " +
                    "want your main folder to look dirty, do you ?")

            # and if the folder already exists, and that no parameter file was
            # provided, use the log.param
            if os.path.isdir(args.folder):
                if os.path.exists(os.path.join(args.folder, 'log.param')):
                    # if the log.param exists, and that a parameter file was
                    # provided, take instead the log.param, and notify the
                    # user.
                    old_param = args.param
                    args.param = os.path.join(args.folder, 'log.param')
                    if old_param is not None:
                        warnings.warn(
                            "Appending to an existing folder: using the "
                            "log.param instead of %s" % old_param)
                else:
                    if args.param is None:
                        raise io_mp.ConfigurationError(
                            "The requested output folder seems empty. "
                            "You must then provide a parameter file (command"
                            " line option -p any.param)")
            else:
                if args.param is None:
                    raise io_mp.ConfigurationError(
                        "The requested output folder appears to be non "
                        "existent. You must then provide a parameter file "
                        "(command line option -p any.param)")

    return args
Beispiel #18
0
def initialise(custom_command=''):
    """
    Initialisation routine

    This function recovers the input from the command line arguments, from
    :mod:`parser_mp`, the parameter files.

    It then extracts the path of the used Monte Python code, and proceeds to
    initialise a :class:`data` instance, a cosmological code instance.

    Parameters
    ----------
        custom_command: str
            allows for testing the code
    """
    # Parsing line argument
    command_line = parser_mp.parse(custom_command)

    # Recovering the local configuration
    path = recover_local_path(command_line)

    # check for MPI
    try:
        from mpi4py import MPI
        comm = MPI.COMM_WORLD
        rank = comm.Get_rank()
    except ImportError:
        # set all chains to master if no MPI
        rank = 0

    # Recover Monte Python's version number
    version_path = os.path.join(
        path['root'], 'VERSION')
    with open(version_path, 'r') as version_file:
        version = version_file.readline()
    if not command_line.silent and not rank:
        print('Running Monte Python v%s' % version)

    # If the info flag was used, read a potential chain (or set of chains) to
    # be analysed with default procedure. If the argument is a .info file, then
    # it will extract information from it (plots to compute, chains to analyse,
    # etc...)
    if command_line.subparser_name == "info":
        from analyze import analyze  # only invoked when analyzing
        analyze(command_line)
        # FK: we need an additional None because of two cosmo-modules!
        return None, None, None, command_line, False

    # Fill in data, starting from  parameter file. If output folder already
    # exists, the input parameter file was automatically replaced by the
    # existing log.param. This prevents you to run different things in a same
    # folder.
    else:
        data = Data(command_line, path)

        # Overwrite arguments from parameter file with the command line
        if command_line.N is None:
            try:
                command_line.N = data.N
            except AttributeError:
                raise io_mp.ConfigurationError(
                    "You did not provide a number of steps, neither via " +
                    "command line, nor in %s" % command_line.param)

        # Loading up the cosmological backbone. For the moment, only CLASS has been
        # wrapped.
        cosmo1, cosmo2 = recover_cosmological_module(data)

        # Initialising the sampler
        # MH: Creating the file that will contain the chain
        if command_line.method == 'MH':
            io_mp.create_output_files(command_line, data)
        # NS: Creating the NS subfolder and the MultiNest arguments
        elif command_line.method == 'NS':
            from MultiNest import initialise as initialise_mn
            initialise_mn(cosmo1, cosmo2, data, command_line)
        # PC: Creating the PC subfolder and the PolyChord arguments
        elif command_line.method == 'PC':
            from PolyChord import initialise as initialise_pc
            initialise_pc(cosmo1, cosmo2, data, command_line)

        return cosmo1, cosmo2, data, command_line, True
Beispiel #19
0
    def __init__(self, path, data, command_line):

        Likelihood.__init__(self, path, data, command_line)

        print("Initializing Lya likelihood")

        self.need_cosmo_arguments(data, {'output': 'mPk'})
        self.need_cosmo_arguments(data, {'P_k_max_h/Mpc': 1.5 * self.kmax})

        # number of grid points for the lcdm case (i.e. alpha=0, regardless of beta and gamma values), not needed
        #lcdm_points = 33
        # number of non-astro params (i.e. alpha, beta, and gamma)
        self.params_numbers = 3

        alphas = np.zeros(self.grid_size, 'float64')
        betas = np.zeros(self.grid_size, 'float64')
        gammas = np.zeros(self.grid_size, 'float64')

        # Derived_lkl is a new type of derived parameter calculated in the likelihood, and not known to class.
        # This first initialising avoids problems in the case of an error in the first point of the MCMC
        data.derived_lkl = {'alpha': 0, 'beta': 0, 'gamma': 0, 'lya_neff': 0}

        self.bin_file_path = os.path.join(command_line.folder,
                                          self.bin_file_name)
        if not os.path.exists(self.bin_file_path):
            with open(self.bin_file_path, 'w') as bin_file:
                bin_file.write('#')
                for name in data.get_mcmc_parameters(['varying']):
                    name = re.sub('[$*&]', '', name)
                    bin_file.write(' %s\t' % name)
                for name in data.get_mcmc_parameters(['derived']):
                    name = re.sub('[$*&]', '', name)
                    bin_file.write(' %s\t' % name)
                for name in data.get_mcmc_parameters(['derived_lkl']):
                    name = re.sub('[$*&]', '', name)
                    bin_file.write(' %s\t' % name)
                bin_file.write('\n')
                bin_file.close()
        if 'z_reio' not in data.get_mcmc_parameters([
                'derived'
        ]) or 'sigma8' not in data.get_mcmc_parameters(['derived']):
            raise io_mp.ConfigurationError(
                'Error: Lya likelihood need z_reio and sigma8 as derived parameters'
            )

        file_path = os.path.join(self.data_directory, self.grid_file)
        if os.path.exists(file_path):
            with open(file_path, 'r') as grid_file:
                line = grid_file.readline()
                while line.find('#') != -1:
                    line = grid_file.readline()
                while (line.find('\n') != -1 and len(line) == 3):
                    line = grid_file.readline()
                for index in range(self.grid_size):
                    alphas[index] = float(line.split()[0])
                    betas[index] = float(line.split()[1])
                    gammas[index] = float(line.split()[2])
                    line = grid_file.readline()
                grid_file.close()
        else:
            raise io_mp.ConfigurationError('Error: grid file is missing')

        # Real parameters
        X_real = np.zeros((self.grid_size, self.params_numbers), 'float64')

        for k in range(self.grid_size):
            X_real[k][0] = self.khalf(alphas[k], betas[k],
                                      gammas[k])  # Here we use k_1/2
            X_real[k][1] = betas[k]
            X_real[k][2] = gammas[k]

        # For the normalization
        self.a_min = min(X_real[:, 0])
        self.b_min = min(X_real[:, 1])
        self.g_min = min(X_real[:, 2])
        self.a_max = max(X_real[:, 0])
        self.b_max = max(X_real[:, 1])
        self.g_max = max(X_real[:, 2])

        # Redshift independent parameters - params order: z_reio, sigma_8, n_eff, f_UV
        self.zind_param_size = [3, 5, 5,
                                3]  # How many values we have for each param
        self.zind_param_min = np.array([7., 0.5, -2.6, 0.])
        self.zind_param_max = np.array([15., 1.5, -2.0, 1.])
        zind_param_ref = np.array([9., 0.829, -2.3074, 0.])
        self.zreio_range = self.zind_param_max[0] - self.zind_param_min[0]
        self.neff_range = self.zind_param_max[2] - self.zind_param_min[2]

        # Redshift dependent parameters - params order: params order: mean_f, t0, slope
        zdep_params_size = [9, 3, 3]  # How many values we have for each param
        zdep_params_refpos = [4, 1, 2]  # Where to store the P_F(ref) DATA

        # Mean flux values
        flux_ref_old = (np.array([
            0.669181, 0.617042, 0.564612, 0.512514, 0.461362, 0.411733,
            0.364155, 0.253828, 0.146033, 0.0712724
        ]))
        # Older, not used values
        #flux_min_meanf = (np.array([0.401509, 0.370225, 0.338767, 0.307509, 0.276817, 0.24704, 0.218493, 0.152297, 0.0876197, 0.0427634]))
        #flux_max_meanf = (np.array([0.936854, 0.863859, 0.790456, 0.71752, 0.645907, 0.576426, 0.509816, 0.355359, 0.204446, 0.0997813]))

        # Manage the data sets
        # FIRST (NOT USED) DATASET (19 wavenumbers) ***XQ-100***
        self.zeta_range_XQ = [
            3.0, 3.2, 3.4, 3.6, 3.8, 4.0, 4.2
        ]  # List of redshifts corresponding to the 19 wavenumbers (k)
        self.k_XQ = [
            0.003, 0.006, 0.009, 0.012, 0.015, 0.018, 0.021, 0.024, 0.027,
            0.03, 0.033, 0.036, 0.039, 0.042, 0.045, 0.048, 0.051, 0.054, 0.057
        ]

        # SECOND DATASET (7 wavenumbers) ***HIRES/MIKE***
        self.zeta_range_mh = [
            4.2, 4.6, 5.0, 5.4
        ]  # List of redshifts corresponding to the 7 wavenumbers (k)
        self.k_mh = [
            0.00501187, 0.00794328, 0.0125893, 0.0199526, 0.0316228, 0.0501187,
            0.0794328
        ]  # Note that k is in s/km

        self.zeta_full_length = (len(self.zeta_range_XQ) +
                                 len(self.zeta_range_mh))
        self.kappa_full_length = (len(self.k_XQ) + len(self.k_mh))

        # Which snapshots we use (first 7 for first dataset, last 4 for second one)
        self.redshift = [3.0, 3.2, 3.4, 3.6, 3.8, 4.0, 4.2, 4.2, 4.6, 5.0, 5.4]

        #T 0 and slope values
        t0_ref_old = np.array([
            11251.5, 11293.6, 11229.0, 10944.6, 10421.8, 9934.49, 9227.31,
            8270.68, 7890.68, 7959.4
        ])
        slope_ref_old = np.array([
            1.53919, 1.52894, 1.51756, 1.50382, 1.48922, 1.47706, 1.46909,
            1.48025, 1.50814, 1.52578
        ])

        t0_values_old = np.zeros((10, zdep_params_size[1]), 'float64')
        t0_values_old[:, 0] = np.array([
            7522.4, 7512.0, 7428.1, 7193.32, 6815.25, 6480.96, 6029.94,
            5501.17, 5343.59, 5423.34
        ])
        t0_values_old[:, 1] = t0_ref_old[:]
        t0_values_old[:, 2] = np.array([
            14990.1, 15089.6, 15063.4, 14759.3, 14136.3, 13526.2, 12581.2,
            11164.9, 10479.4, 10462.6
        ])

        slope_values_old = np.zeros((10, zdep_params_size[2]), 'float64')
        slope_values_old[:, 0] = np.array([
            0.996715, 0.979594, 0.960804, 0.938975, 0.915208, 0.89345,
            0.877893, 0.8884, 0.937664, 0.970259
        ])
        slope_values_old[:, 1] = [
            1.32706, 1.31447, 1.30014, 1.28335, 1.26545, 1.24965, 1.2392,
            1.25092, 1.28657, 1.30854
        ]
        slope_values_old[:, 2] = slope_ref_old[:]

        self.t0_min = t0_values_old[:, 0] * 0.1
        self.t0_max = t0_values_old[:, 2] * 1.4
        self.slope_min = slope_values_old[:, 0] * 0.8
        self.slope_max = slope_values_old[:, 2] * 1.15

        # Import the two grids for Kriging
        file_path = os.path.join(self.data_directory, self.astro_spectra_file)
        if os.path.exists(file_path):
            try:
                pkl = open(file_path, 'rb')
                self.input_full_matrix_interpolated_ASTRO = pickle.load(pkl)
            except UnicodeDecodeError as e:
                pkl = open(file_path, 'rb')
                self.input_full_matrix_interpolated_ASTRO = pickle.load(
                    pkl, encoding='latin1')
            pkl.close()
        else:
            raise io_mp.ConfigurationError(
                'Error: astro spectra file is missing')

        file_path = os.path.join(self.data_directory, self.abg_spectra_file)
        if os.path.exists(file_path):
            try:
                pkl = open(file_path, 'rb')
                self.input_full_matrix_interpolated_ABG = pickle.load(pkl)
            except UnicodeDecodeError as e:
                pkl = open(file_path, 'rb')
                self.input_full_matrix_interpolated_ABG = pickle.load(
                    pkl, encoding='latin1')
            pkl.close()
        else:
            raise io_mp.ConfigurationError(
                'Error: abg spectra file is missing')

        ALL_zdep_params = len(flux_ref_old) + len(t0_ref_old) + len(
            slope_ref_old)
        grid_length_ABG = len(self.input_full_matrix_interpolated_ABG[0, 0, :])
        grid_length_ASTRO = len(
            self.input_full_matrix_interpolated_ASTRO[0, 0, :])
        astroparams_number_KRIG = len(self.zind_param_size) + ALL_zdep_params

        # Import the ABG GRID (alpha, beta, gamma)
        file_path = os.path.join(self.data_directory, self.abg_grid_file)
        if os.path.exists(file_path):
            self.X_ABG = np.zeros((grid_length_ABG, self.params_numbers),
                                  'float64')
            for param_index in range(self.params_numbers):
                self.X_ABG[:,
                           param_index] = np.genfromtxt(file_path,
                                                        usecols=[param_index],
                                                        skip_header=1)
        else:
            raise io_mp.ConfigurationError('Error: abg grid file is missing')

        # Import the ASTRO GRID (ordering of params: z_reio, sigma_8, n_eff, f_UV, mean_f(z), t0(z), slope(z))
        file_path = os.path.join(self.data_directory, self.abg_astro_grid_file)
        if os.path.exists(file_path):
            self.X = np.zeros((grid_length_ASTRO, astroparams_number_KRIG),
                              'float64')
            for param_index in range(astroparams_number_KRIG):
                self.X[:, param_index] = np.genfromtxt(file_path,
                                                       usecols=[param_index],
                                                       skip_header=1)
        else:
            raise io_mp.ConfigurationError(
                'Error: abg+astro grid file is missing')

        # Prepare the interpolation in astro-param space
        self.redshift_list = np.array([
            3.0, 3.2, 3.4, 3.6, 3.8, 4.0, 4.2, 4.6, 5.0, 5.4
        ])  # This corresponds to the combined dataset (MIKE/HIRES + XQ-100)
        self.F_prior_min = np.array([
            0.535345, 0.493634, 0.44921, 0.392273, 0.338578, 0.28871, 0.218493,
            0.146675, 0.0676442, 0.0247793
        ])
        self.F_prior_max = np.array([
            0.803017, 0.748495, 0.709659, 0.669613, 0.628673, 0.587177,
            0.545471, 0.439262, 0.315261, 0.204999
        ])

        # Load the data
        if not self.DATASET == "mike-hires":
            raise io_mp.LikelihoodError(
                'Error: for the time being, only the mike - hires dataset is available'
            )

        file_path = os.path.join(self.data_directory, self.MIKE_spectra_file)
        if os.path.exists(file_path):
            try:
                pkl = open(file_path, 'rb')
                y_M_reshaped = pickle.load(pkl)
            except UnicodeDecodeError as e:
                pkl = open(file_path, 'rb')
                y_M_reshaped = pickle.load(pkl, encoding='latin1')
            pkl.close()
        else:
            raise io_mp.ConfigurationError(
                'Error: MIKE spectra file is missing')

        file_path = os.path.join(self.data_directory, self.HIRES_spectra_file)
        if os.path.exists(file_path):
            try:
                pkl = open(file_path, 'rb')
                y_H_reshaped = pickle.load(pkl)
            except UnicodeDecodeError as e:
                pkl = open(file_path, 'rb')
                y_H_reshaped = pickle.load(pkl, encoding='latin1')
            pkl.close()
        else:
            raise io_mp.ConfigurationError(
                'Error: HIRES spectra file is missing')

        file_path = os.path.join(self.data_directory, self.MIKE_cov_file)
        if os.path.exists(file_path):
            try:
                pkl = open(file_path, 'rb')
                cov_M_inverted = pickle.load(pkl)
            except UnicodeDecodeError as e:
                pkl = open(file_path, 'rb')
                cov_M_inverted = pickle.load(pkl, encoding='latin1')
            pkl.close()
        else:
            raise io_mp.ConfigurationError(
                'Error: MIKE covariance matrix file is missing')

        file_path = os.path.join(self.data_directory, self.HIRES_cov_file)
        if os.path.exists(file_path):
            try:
                pkl = open(file_path, 'rb')
                cov_H_inverted = pickle.load(pkl)
            except UnicodeDecodeError as e:
                pkl = open(file_path, 'rb')
                cov_H_inverted = pickle.load(pkl, encoding='latin1')
            pkl.close()
        else:
            raise io_mp.ConfigurationError(
                'Error: HIRES covariance matrix file is missing')

        file_path = os.path.join(self.data_directory, self.PF_noPRACE_file)
        if os.path.exists(file_path):
            try:
                pkl = open(file_path, 'rb')
                self.PF_noPRACE = pickle.load(pkl)
            except UnicodeDecodeError as e:
                pkl = open(file_path, 'rb')
                self.PF_noPRACE = pickle.load(pkl, encoding='latin1')
            pkl.close()
        else:
            raise io_mp.ConfigurationError('Error: PF_noPRACE file is missing')

        self.cov_MH_inverted = block_diag(cov_H_inverted, cov_M_inverted)
        self.y_MH_reshaped = np.concatenate((y_H_reshaped, y_M_reshaped))

        print("Initialization of Lya likelihood done")
Beispiel #20
0
def run(cosmo, data, command_line):
    """
    Performing the Importance Sampling

    The idea is to start from an existing run, constraining a certain model I,
    given a set of experiments. The new run will constrain the same model I,
    but adding one or several new experiments. In the case where it is expected
    that the final posterior distribution should not differ too greatly between
    the two parameter extractions, then using Importance Sampling can speed up
    significantly the second one.

    Instead of properly sampling randomly the parameter space, it instead reads
    the chains from the previous run, recompute the cosmology at this point,
    then adds the log-likelihood contributed by the new experiments to the
    previous ones. As an input of the method, with the flag
    `--IS-starting-folder`, you can thus specify either a folder containing a
    Monte Python run, or a set of chains that you want to be converted.

    The code will automatically compute the minimum amount of things. For
    instance, if the first run had all the Planck likelihoods, and the second,
    all the Planck likelihoods plus a prior on :math:`H_0`, it would be absurd
    to recompute also the cosmological perturbations: the only needed quantity
    is a background quantity.

    The new chains will hence store the same points in parameter space, but
    with a different value of the likelihood, and also of the multiplicity -
    that will become non-integer. Indeed, the multiplicity is also a probe of
    the posterior, and this new, higher likelihood should have had a higher
    multiplicity.
    """
    # Check that the command_line "--IS-starting-folder" points to an existing
    # Monte Python folder run, or a subset of files, and store in any case all
    # the chains to analyze in the chains.
    starting_folder = command_line.IS_starting_folder
    if not starting_folder:
        raise io_mp.ConfigurationError(
            "When running importance sampling, you should specify a folder or"
            " a set of chains with the option '--IS-starting-folder'")
    chains = []
    # If starting_folder is of length 1, it means it is either a whole folder,
    # or just one chain. If it is a folder, we recover all chains within.
    if len(starting_folder) == 1:
        starting_folder = starting_folder[0]
        if os.path.isdir(starting_folder):
            for elem in os.listdir(starting_folder):
                if elem.find("__") != -1:
                    chains.append(elem)
    # Else, it is a list of chains, of which we recover folder name, and store
    # all of them in chains.
    else:
        chains = starting_folder
        starting_folder = os.path.sep.join(chains[0].split(os.path.sep)[:-1])
        chains = [elem.split(os.path.sep)[-1] for elem in chains]

    # Recovering only the extra likelihoods
    new_experiments = recover_new_experiments(
        data, command_line, starting_folder)
    if not new_experiments:
        raise io_mp.ConfigurationError(
            "You are using Importance Sampling without adding a new "
            "experiment. This is not what this method is coded for.")

    # resetting the needed cosmo arguments, and deleting the dictionary of
    # likelihoods, only if new_experiments is smaller than the old ones.
    ignore_likelihood = False

    # Wipe out the problematic information from previous likelihoods,
    # namely their desired output
    data.cosmo_arguments['output'] = ''

    try:
        del data.cosmo_arguments['l_max_scalars']
        del data.cosmo_arguments['lensing']
    except KeyError:
        pass

    # Initialise the requirements of the new likelihood
    data.initialise_likelihoods(new_experiments)

    # Multiprocessing part, to analyze all the chains in parallel. When not
    # specifying any 'processes' keyword argument to the Pool call, the system
    # uses as many as possible.
    pool = Pool()
    args = [(data, cosmo, command_line, starting_folder,
             elem, ignore_likelihood) for elem in chains]
    # Note the use of translate_chain_star, and not translate_chain, because of
    # the limitations of the `map` function (it only takes one argument). The
    # `_star` function simply unwraps the argument.
    print('\nStart extracting the chains:\n')
    pool.map(translate_chain_star, args)
    # Close the pool, and join everything (the join might not be needed)
    pool.close()
    pool.join()
Beispiel #21
0
def initialise(cosmo, data, command_line):
    """
    Main call to prepare the information for the MultiNest run.
    """

    # Convenience variables
    varying_param_names = data.get_mcmc_parameters(['varying'])
    derived_param_names = data.get_mcmc_parameters(['derived'])
    nslow = len(data.get_mcmc_parameters(['varying', 'cosmo']))
    nfast = len(data.get_mcmc_parameters(['varying', 'nuisance']))

    # Check that all the priors are flat and that all the parameters are bound
    is_flat, is_bound = sampler.check_flat_bound_priors(
        data.mcmc_parameters, varying_param_names)
    if not is_flat:
        raise io_mp.ConfigurationError(
            'Nested Sampling with PolyChord is only possible ' +
            'with flat priors. Sorry!')
    if not is_bound:
        raise io_mp.ConfigurationError(
            'Nested Sampling with PolyChord is only possible ' +
            'for bound parameters. Set reasonable bounds for them in the ' +
            '".param" file.')

    # If absent, create the sub-folder PC
    PC_folder = os.path.join(command_line.folder, PC_subfolder)
    if not os.path.exists(PC_folder):
        os.makedirs(PC_folder)

    # If absent, create the sub-folder PC/clusters
    PC_clusters_folder = os.path.join(PC_folder,'clusters')
    if not os.path.exists(PC_clusters_folder):
        os.makedirs(PC_clusters_folder)

    # Use chain name as a base name for PolyChord files
    chain_name = [a for a in command_line.folder.split(os.path.sep) if a][-1]
    base_name = os.path.join(PC_folder, chain_name)

    # Prepare arguments for PyPolyChord
    # -- Automatic arguments
    data.PC_arguments['file_root'] = chain_name
    data.PC_arguments['base_dir'] = PC_folder
    data.PC_arguments['grade_dims'] = []
    data.PC_arguments['grade_frac'] = []
    if nslow:
        data.PC_arguments['grade_dims'].append(nslow)
        data.PC_arguments['grade_frac'].append(0.75)
    if nfast:
        data.PC_arguments['grade_dims'].append(nfast)
        data.PC_arguments['grade_frac'].append(0.25)

    data.PC_arguments['num_repeats'] = data.PC_arguments['grade_dims'][0] * 2

    # -- User-defined arguments
    for arg in PC_user_arguments:
        value = getattr(command_line, PC_prefix+arg)
        if value != -1:
            data.PC_arguments[arg] = value
        # else: don't define them -> use PyPolyChord default value

    data.PC_param_names = varying_param_names

    # Write the PolyChord arguments and parameter ordering
    with open(base_name+name_arguments, 'w') as afile:
        for arg in data.PC_arguments:
            afile.write(' = '.join(
                [str(arg), str(data.PC_arguments[arg])]))
            afile.write('\n')
    with open(base_name+name_paramnames, 'w') as pfile:
        pfile.write('\n'.join(data.PC_param_names+derived_param_names))
Beispiel #22
0
    def loglkl(self, cosmo, data):

        k = np.logspace(np.log10(self.kmin),
                        np.log10(self.kmax),
                        num=self.k_size)

        # Initialise the bin file
        if not os.path.exists(self.bin_file_path):
            with open(self.bin_file_path, 'w') as bin_file:
                bin_file.write('#')
                for name in data.get_mcmc_parameters(['varying']):
                    name = re.sub('[$*&]', '', name)
                    bin_file.write(' %s\t' % name)
                for name in data.get_mcmc_parameters(['derived']):
                    name = re.sub('[$*&]', '', name)
                    bin_file.write(' %s\t' % name)
                for name in data.get_mcmc_parameters(['derived_lkl']):
                    name = re.sub('[$*&]', '', name)
                    bin_file.write(' %s\t' % name)
                bin_file.write('\n')
                bin_file.close()

        # Deal with the astro nuisance parameters
        if 'T0a' in data.mcmc_parameters:
            T0a = data.mcmc_parameters['T0a'][
                'current'] * data.mcmc_parameters['T0a']['scale']
        else:
            T0a = 0.74
        if 'T0s' in data.mcmc_parameters:
            T0s = data.mcmc_parameters['T0s'][
                'current'] * data.mcmc_parameters['T0s']['scale']
        else:
            T0s = -4.38
        if 'gamma_a' in data.mcmc_parameters:
            gamma_a = data.mcmc_parameters['gamma_a'][
                'current'] * data.mcmc_parameters['gamma_a']['scale']
        else:
            gamma_a = 1.45
        if 'gamma_s' in data.mcmc_parameters:
            gamma_s = data.mcmc_parameters['gamma_s'][
                'current'] * data.mcmc_parameters['gamma_s']['scale']
        else:
            gamma_s = 1.93
        if 'Fz1' in data.mcmc_parameters:
            Fz1 = data.mcmc_parameters['Fz1'][
                'current'] * data.mcmc_parameters['Fz1']['scale']
        else:
            Fz1 = 0.35
        if 'Fz2' in data.mcmc_parameters:
            Fz2 = data.mcmc_parameters['Fz2'][
                'current'] * data.mcmc_parameters['Fz2']['scale']
        else:
            Fz2 = 0.26
        if 'Fz3' in data.mcmc_parameters:
            Fz3 = data.mcmc_parameters['Fz3'][
                'current'] * data.mcmc_parameters['Fz3']['scale']
        else:
            Fz3 = 0.18
        if 'Fz4' in data.mcmc_parameters:
            Fz4 = data.mcmc_parameters['Fz4'][
                'current'] * data.mcmc_parameters['Fz4']['scale']
        else:
            Fz4 = 0.07
        if 'F_UV' in data.mcmc_parameters:
            F_UV = data.mcmc_parameters['F_UV'][
                'current'] * data.mcmc_parameters['F_UV']['scale']
        else:
            F_UV = 0.0

        # Get P(k) from CLASS
        h = cosmo.h()
        Plin = np.zeros(len(k), 'float64')
        for index_k in range(len(k)):
            Plin[index_k] = cosmo.pk_lin(k[index_k] * h, 0.0)
        Plin *= h**3

        # Compute the Lya k scale
        Om = cosmo.Omega_m()
        OL = cosmo.Omega_Lambda()
        k_neff = self.k_s_over_km * 100. / (1. + self.z) * ((
            (1. + self.z)**3 * Om + OL)**(1. / 2.))

        derived = cosmo.get_current_derived_parameters(
            data.get_mcmc_parameters(['derived']))
        for (name, value) in derived.items():
            data.mcmc_parameters[name]['current'] = value
        for name in derived:
            data.mcmc_parameters[name]['current'] /= data.mcmc_parameters[
                name]['scale']

        # Obtain current z_reio, sigma_8, and neff from CLASS
        z_reio = data.mcmc_parameters['z_reio']['current']
        # Check that z_reio is in the correct range
        if z_reio < self.zind_param_min[0]:
            z_reio = self.zind_param_min[0]
        if z_reio > self.zind_param_max[0]:
            z_reio = self.zind_param_max[0]
        sigma8 = data.mcmc_parameters['sigma8']['current']
        neff = cosmo.pk_tilt(k_neff * h, self.z)

        # Store neff as a derived_lkl parameter
        data.derived_lkl['lya_neff'] = neff

        # First sanity check, to make sure the cosmological parameters are in the correct range
        if ((sigma8 < self.zind_param_min[1]
             or sigma8 > self.zind_param_max[1]) or
            (neff < self.zind_param_min[2] or neff > self.zind_param_max[2])):
            with open(self.bin_file_path, 'a') as bin_file:
                bin_file.write('#Error_cosmo\t')
                for elem in data.get_mcmc_parameters(['varying']):
                    bin_file.write(' %.6e\t' %
                                   data.mcmc_parameters[elem]['current'])
                for elem in data.get_mcmc_parameters(['derived']):
                    bin_file.write(' %.6e\t' %
                                   data.mcmc_parameters[elem]['current'])
                for elem in data.get_mcmc_parameters(['derived_lkl']):
                    bin_file.write(' %.6e\t' %
                                   data.mcmc_parameters[elem]['current'])
                bin_file.write('\n')
                bin_file.close()
            sys.stderr.write('#Error_cosmo\n')
            sys.stderr.flush()
            return data.boundary_loglike

        # Here Neff is the standard N_eff (effective d.o.f.)
        classNeff = cosmo.Neff()

        # Store the current CLASS values for later
        param_backup = data.cosmo_arguments.copy()

        # To calculate the LCDM-equivalent, we need to remap the non-LCDM parameters, following 1412.6763
        if 'xi_idr' in data.cosmo_arguments or 'N_ur' in data.cosmo_arguments or 'N_ncdm' in data.cosmo_arguments or 'N_dg' in data.cosmo_arguments or 'N_idr' in data.cosmo_arguments:
            eta2 = (1. + 0.2271 * classNeff) / (1. + 0.2271 * 3.046)
            eta = np.sqrt(eta2)

            if 'N_ur' in data.cosmo_arguments:
                data.cosmo_arguments['N_ur'] = 3.046
            if 'N_ncdm' in data.cosmo_arguments:
                del data.cosmo_arguments['N_ncdm']

            if 'omega_b' in data.cosmo_arguments:
                data.cosmo_arguments['omega_b'] *= 1. / eta2
            if 'omega_cdm' in data.cosmo_arguments:
                data.cosmo_arguments['omega_cdm'] *= 1. / eta2
            if 'H0' in data.cosmo_arguments:
                data.cosmo_arguments['H0'] *= 1. / eta
            if '100*theta_s' in data.cosmo_arguments:
                raise io_mp.ConfigurationError(
                    'Error: run with H0 instead of 100*theta_s')

            # Deal with Interacting Dark Matter with Dark Radiation (ETHOS-like models)
            if 'xi_idr' in data.cosmo_arguments or 'N_idr' in data.cosmo_arguments or 'N_dg' in data.cosmo_arguments:
                # Class can take Omega_idm_dr, omega_idm_dr, or f_idm_dr, so the following lines are needed to comput the lcdm equivalent
                if 'Omega_idm_dr' in data.cosmo_arguments:
                    if 'Omega_cdm' in data.cosmo_arguments:
                        data.cosmo_arguments[
                            'Omega_cdm'] += data.cosmo_arguments[
                                'Omega_idm_dr']
                    if 'omega_cdm' in data.cosmo_arguments:
                        data.cosmo_arguments[
                            'omega_cdm'] += data.cosmo_arguments[
                                'Omega_idm_dr'] * h * h / eta / eta
                    del data.cosmo_arguments['Omega_idm_dr']
                if 'omega_idm_dr' in data.cosmo_arguments:
                    if 'Omega_cdm' in data.cosmo_arguments:
                        data.cosmo_arguments[
                            'Omega_cdm'] += data.cosmo_arguments[
                                'omega_idm_dr'] / h / h
                    if 'omega_cdm' in data.cosmo_arguments:
                        data.cosmo_arguments[
                            'omega_cdm'] += data.cosmo_arguments[
                                'omega_idm_dr'] / eta2
                    del data.cosmo_arguments['omega_idm_dr']
                # Now we clean up variable no longer needed. We cover all possible cases of notation
                to_remove = [
                    'xi_idr', 'N_dg', 'N_idr', 'a_idm_dr', 'a_dark',
                    'Gamma_0_nadm', 'nindex_dark', 'n_index_idm_dr', 'f_idm_dr'
                ]
                for bad in to_remove:
                    if bad in data.cosmo_arguments:
                        del data.cosmo_arguments[bad]

            # Deal with Hot Dark Matter
            if 'm_ncdm' in data.cosmo_arguments and not 'omega_ncdm' in data.cosmo_arguments and not 'Omega_ncdm' in data.cosmo_arguments:
                data.cosmo_arguments['m_ncdm'] *= 1. / eta2
            if 'omega_ncdm' in data.cosmo_arguments and not 'Omega_ncdm' in data.cosmo_arguments and not 'm_ncdm' in data.cosmo_arguments:
                data.cosmo_arguments['omega_ncdm'] *= 1. / eta2

            # Deal with Warm Dark Matter
            if 'm_ncdm' in data.cosmo_arguments and (
                    'omega_ncdm' in data.cosmo_arguments
                    or 'Omega_ncdm' in data.cosmo_arguments):
                if 'Omega_ncdm' in data.cosmo_arguments:
                    if 'Omega_cdm' in data.cosmo_arguments:
                        data.cosmo_arguments[
                            'Omega_cdm'] += data.cosmo_arguments['Omega_ncdm']
                    if 'omega_cdm' in data.cosmo_arguments:
                        data.cosmo_arguments[
                            'omega_cdm'] += data.cosmo_arguments[
                                'Omega_ncdm'] * h * h / eta / eta
                    del data.cosmo_arguments['Omega_ncdm']
                if 'omega_ncdm' in data.cosmo_arguments:
                    if 'Omega_cdm' in data.cosmo_arguments:
                        data.cosmo_arguments[
                            'Omega_cdm'] += data.cosmo_arguments[
                                'omega_ncdm'] / h / h
                    if 'omega_cdm' in data.cosmo_arguments:
                        data.cosmo_arguments[
                            'omega_cdm'] += data.cosmo_arguments[
                                'omega_ncdm'] / eta2
                    del data.cosmo_arguments['omega_ncdm']
                remove = [
                    'm_ncdm', 'T_ncdm', 'ncdm_fluid_approximation',
                    'l_max_ncdm', 'Number of momentum bins', 'Maximum q',
                    'Quadrature strategy'
                ]
                for bad in remove:
                    if bad in data.cosmo_arguments:
                        del data.cosmo_arguments[bad]

        # Empty the CLASS parameters - This is done to calculate the LCDM-equivalent, later we will recover them.
        cosmo.struct_cleanup()
        cosmo.empty()
        cosmo.set(data.cosmo_arguments)
        cosmo.compute(['lensing'])

        # Call CLASS again to get the LCDM equivalent
        Plin_equiv = np.zeros(len(k), 'float64')
        h = cosmo.h()
        for index_k in range(len(k)):
            Plin_equiv[index_k] = cosmo.pk_lin(k[index_k] * h, 0.0)
        Plin_equiv *= h**3

        # Erase new parameters, recover original model
        cosmo.struct_cleanup()
        cosmo.empty()
        data.cosmo_arguments = param_backup
        cosmo.set(data.cosmo_arguments)
        cosmo.compute(['lensing'])

        # Calculate transfer function T(k)
        Tk = np.zeros(len(k), 'float64')
        Tk = np.sqrt(
            abs(Plin) / abs(Plin_equiv)
        )  # The abs is just a safety net, class should never give negative P(k) values

        # Second sanity check, to make sure the LCDM-equiv is not more than 10% off in the low-k range
        k_eq_der = cosmo.get_current_derived_parameters(['k_eq'])
        k_eq = k_eq_der['k_eq'] / h
        if any(abs(Tk[k < np.maximum(k_eq, k[0])]**2 - 1.0) > 0.01):
            with open(self.bin_file_path, 'a') as bin_file:
                bin_file.write('#Error_equiv\t')
                for elem in data.get_mcmc_parameters(['varying']):
                    bin_file.write(' %.6e\t' %
                                   data.mcmc_parameters[elem]['current'])
                for elem in data.get_mcmc_parameters(['derived']):
                    bin_file.write(' %.6e\t' %
                                   data.mcmc_parameters[elem]['current'])
                for elem in data.get_mcmc_parameters(['derived_lkl']):
                    bin_file.write(' %.6e\t' %
                                   data.mcmc_parameters[elem]['current'])
                bin_file.write('\n')
                bin_file.close()
            sys.stderr.write('#Error_equiv\n')
            sys.stderr.flush()
            return data.boundary_loglike

        spline = interpolate.splrep(k, Tk)
        der = interpolate.splev(k, spline, der=1)

        # Set k_max. Here we cut the oscillations (after the first minimum) from the fitted region
        for index_k in range(len(k)):
            index_k_fit_max = -1
            if Tk[index_k] < 0.1 and der[index_k] >= 0.:
                index_k_fit_max = index_k
                break

        # Third sanity check, here we check that the DAO do not start before k_neff
        if k[index_k_fit_max] < k_neff:
            with open(self.bin_file_path, 'a') as bin_file:
                bin_file.write('#Error_kneff\t')
                for elem in data.get_mcmc_parameters(['varying']):
                    bin_file.write(' %.6e\t' %
                                   data.mcmc_parameters[elem]['current'])
                for elem in data.get_mcmc_parameters(['derived']):
                    bin_file.write(' %.6e\t' %
                                   data.mcmc_parameters[elem]['current'])
                for elem in data.get_mcmc_parameters(['derived_lkl']):
                    bin_file.write(' %.6e\t' %
                                   data.mcmc_parameters[elem]['current'])
                bin_file.write('\n')
                bin_file.close()
            sys.stderr.write('#Error_kneff\n')
            sys.stderr.flush()
            return data.boundary_loglike

        k_fit = k[:index_k_fit_max]
        Tk_fit = Tk[:index_k_fit_max]

        # Define objective minimiser function: returns the array to be minimized
        def fcn2min(params, k, Tk):
            alpha = params['alpha']
            beta = params['beta']
            gamma = params['gamma']
            model = self.T(k, alpha, beta, gamma)
            return (model - Tk)

        # Now we will fit the given linear P(k) with the {alpha,beta,gamma}-formula
        # First we create a set of Parameters
        params = Parameters()
        params.add('alpha',
                   value=0.001,
                   min=self.alpha_min,
                   max=self.alpha_max)
        params.add('beta', value=2.24, min=self.beta_min, max=self.beta_max)
        params.add('gamma',
                   value=-4.46,
                   min=self.gamma_min,
                   max=self.gamma_max)

        # Do the fit with the least squares method
        minner = Minimizer(fcn2min, params, fcn_args=(k_fit, Tk_fit))

        result = minner.minimize(method='leastsq')
        best_alpha = result.params['alpha'].value
        best_beta = result.params['beta'].value
        best_gamma = result.params['gamma'].value

        # Store the corresponding alpha, beta, and gamma as derived_lkl
        data.derived_lkl.update({
            'alpha': best_alpha,
            'beta': best_beta,
            'gamma': best_gamma
        })

        Tk_abg = np.zeros(len(k_fit), 'float64')
        Tk_abg = self.T(k_fit, best_alpha, best_beta, best_gamma)

        # Fourth sanity check, to make sure alpha, beta, and gamma are withing the grid range
        if ((best_alpha < self.alpha_min or best_alpha > self.alpha_max)
                or (best_beta < self.beta_min or best_beta > self.beta_max) or
            (best_gamma < self.gamma_min or best_gamma > self.gamma_max)):
            if (best_alpha < self.alpha_min or best_alpha > self.alpha_max):
                with open(self.bin_file_path, 'a') as bin_file:
                    bin_file.write('#Error_a\t')
                    for elem in data.get_mcmc_parameters(['varying']):
                        bin_file.write(' %.6e\t' %
                                       data.mcmc_parameters[elem]['current'])
                    for elem in data.get_mcmc_parameters(['derived']):
                        bin_file.write(' %.6e\t' %
                                       data.mcmc_parameters[elem]['current'])
                    for elem in data.get_mcmc_parameters(['derived_lkl']):
                        bin_file.write(' %.6e\t' %
                                       data.mcmc_parameters[elem]['current'])
                    bin_file.write('\n')
                    bin_file.close()
                sys.stderr.write('#Error_a\n')
                sys.stderr.flush()
            else:
                with open(self.bin_file_path, 'a') as bin_file:
                    bin_file.write('#Error_bg\t')
                    for elem in data.get_mcmc_parameters(['varying']):
                        bin_file.write(' %.6e\t' %
                                       data.mcmc_parameters[elem]['current'])
                    for elem in data.get_mcmc_parameters(['derived']):
                        bin_file.write(' %.6e\t' %
                                       data.mcmc_parameters[elem]['current'])
                    for elem in data.get_mcmc_parameters(['derived_lkl']):
                        bin_file.write(' %.6e\t' %
                                       data.mcmc_parameters[elem]['current'])
                    bin_file.write('\n')
                    bin_file.close()
                sys.stderr.write('#Error_bg\n')
                sys.stderr.flush()
            return data.boundary_loglike

        # Fifth sanity check, to make sure the fit is never off ny more than 10%
        for ik in range(len(k_fit)):
            index_k_check_max = -1
            if Tk_fit[ik] < 0.2:
                index_k_check_max = ik
                break
        if any(
                abs(Tk_fit[:index_k_check_max] / Tk_abg[:index_k_check_max] -
                    1.) > 0.1):
            with open(self.bin_file_path, 'a') as bin_file:
                for elem in data.get_mcmc_parameters(['varying']):
                    bin_file.write(' %.6e\t' %
                                   data.mcmc_parameters[elem]['current'])
                for elem in data.get_mcmc_parameters(['derived']):
                    bin_file.write(' %.6e\t' %
                                   data.mcmc_parameters[elem]['current'])
                for elem in data.get_mcmc_parameters(['derived_lkl']):
                    bin_file.write(' %.6e\t' %
                                   data.mcmc_parameters[elem]['current'])
                bin_file.write('\n')
                bin_file.close()
            sys.stderr.write('#Error_fit\n')
            sys.stderr.flush()
            return data.boundary_loglike

        # If the model has passed all the sanity checks, do the final chi2 computation
        chi2 = 0.

        model_H = np.zeros((len(self.zeta_range_mh), len(self.k_mh)),
                           'float64')
        model_M = np.zeros((len(self.zeta_range_mh) - 1, len(self.k_mh)),
                           'float64')
        theta = np.array([
            best_alpha, best_beta, best_gamma, z_reio, sigma8, neff, F_UV, Fz1,
            Fz2, Fz3, Fz4, T0a, T0s, gamma_a, gamma_s
        ])
        model = self.PF_noPRACE * self.ordkrig_estimator_3D(
            theta, self.redshift_list)
        upper_block = np.vsplit(model, [7, 11])[0]
        lower_block = np.vsplit(model, [7, 11])[1]

        model_H[:, :] = lower_block[:, 19:]
        model_H_reshaped = np.reshape(model_H, -1, order='C')
        model_M[:, :] = lower_block[:3, 19:]
        model_M_reshaped = np.reshape(model_M, -1, order='C')
        model_MH_reshaped = np.concatenate(
            (model_H_reshaped, model_M_reshaped))
        chi2 = np.dot((self.y_MH_reshaped - model_MH_reshaped),
                      np.dot(self.cov_MH_inverted,
                             (self.y_MH_reshaped - model_MH_reshaped)))

        loglkl = -0.5 * chi2

        return loglkl
Beispiel #23
0
    def initialise_likelihoods(self, experiments):
        """
        Given an array of experiments, return an ordered dict of instances

        .. Note::

            in the __init__ method, experiments is naturally self.experiments,
            but it is useful to keep it as a parameter, for the case of
            importance sampling.

        """

        self.lkl = od()
        # adding the likelihood directory to the path, to import the module
        # then, for each library, calling an instance of the likelihood.
        # Beware, though, if you add new likelihoods, they should go to the
        # folder likelihoods/yourlike/yourlike.py, and contain a yourlike.data,
        # otherwise the following set of commands will not work anymore.

        # For the logging if log_flag is True, each likelihood will log its
        # parameters

        # Due to problems in relative import, this line must be there. Until a
        # better solution is found. It adds the root folder of the MontePython
        # used as the first element in the sys.path
        sys.path.insert(0, self.path['root'])

        for elem in experiments:

            folder = os.path.abspath(
                os.path.join(self.path['MontePython'], "likelihoods",
                             "%s" % elem))
            # add the folder of the likelihood to the path of libraries to...
            # ... import easily the likelihood.py program
            try:
                exec "from likelihoods.%s import %s" % (elem, elem)
            except ImportError as message:
                raise io_mp.ConfigurationError(
                    "Trying to import the %s likelihood" % elem +
                    " as asked in the parameter file, and failed."
                    " Please make sure it is in the `montepython/"
                    "likelihoods` folder, and is a proper python "
                    "module. Check also that the name of the class"
                    " defined in the __init__.py matches the name "
                    "of the folder. In case this is not enough, "
                    "here is the original message: %s\n" % message)
            # Initialize the likelihoods. Depending on the values of
            # command_line and log_flag, the routine will call slightly
            # different things. If log_flag is True, the log.param will be
            # appended.
            try:
                exec "self.lkl['%s'] = %s('%s/%s.data',\
                    self, self.command_line)" % (elem, elem, folder, elem)
            except KeyError as e:
                if e.find('clik') != -1:
                    raise io_mp.ConfigurationError(
                        "You should provide a 'clik' entry in the dictionary "
                        "path defined in the file default.conf")
                else:
                    raise io_mp.ConfigurationError(
                        "The following key: '%s' was not found" % e)
def chain(cosmo, data, command_line):
    """
    Run a Markov chain of fixed length with a Metropolis Hastings algorithm.

    Main function of this module, this is the actual Markov chain procedure.
    After having selected a starting point in parameter space defining the
    first **last accepted** one, it will, for a given amount of steps :

    + choose randomly a new point following the *proposal density*,
    + compute the cosmological *observables* through the cosmological module,
    + compute the value of the *likelihoods* of the desired experiments at this
      point,
    + *accept/reject* this point given its likelihood compared to the one of
      the last accepted one.

    Every time the code accepts :code:`data.write_step` number of points
    (quantity defined in the input parameter file), it will write the result to
    disk (flushing the buffer by forcing to exit the output file, and reopen it
    again.

    .. note::

        to use the code to set a fiducial file for certain fixed parameters,
        you can use two solutions. The first one is to put all input 1-sigma
        proposal density to zero (this method still works, but is not
        recommended anymore). The second one consist in using the flag "-f 0",
        to force a step of zero amplitude.

    """

    ## Initialisation
    loglike = 0

    # In case command_line.silent has been asked, outputs should only contain
    # data.out. Otherwise, it will also contain sys.stdout
    outputs = [data.out]
    if not command_line.silent:
        outputs.append(sys.stdout)

    use_mpi = False
    # check for MPI
    try:
        from mpi4py import MPI
        comm = MPI.COMM_WORLD
        rank = comm.Get_rank()
        # suppress duplicate output from slaves
        if rank:
            command_line.quiet = True
        use_mpi = True
    except ImportError:
        # set all chains to master if no MPI
        rank = 0

    # Initialise master and slave chains for superupdate.
    # Workaround in order to have one master chain and several slave chains even when
    # communication fails between MPI chains. It could malfunction on some hardware.
    # TODO: Would like to merge with MPI initialization above and make robust and logical
    # TODO: Or if keeping current scheme, store value and delete jumping_factor.txt
    # TODO: automatically if --parallel-chains is enabled
    if command_line.superupdate and data.jumping_factor:
        try:
            jump_file = open(command_line.folder + '/jumping_factor.txt', 'r')
            #if command_line.restart is None:
            if not use_mpi and command_line.parallel_chains:
                rank = 1
                warnings.warn(
                    'MPI not in use, flag --parallel-chains enabled, '
                    'superupdate enabled, and a jumping_factor.txt file detected. '
                    'If relaunching in the same folder or restarting a run this '
                    'will cause all chains to be assigned as slaves. In this case '
                    'instead note the value in jumping_factor.txt, delete the '
                    'file, and pass the value with flag -f <value>. This warning '
                    'may then appear again, but you can safely disregard it.')
            else:
                # For restart runs we want to save the input jumping factor
                # as starting jumping factor, but continue from the jumping
                # factor stored in the file.
                starting_jumping_factor = data.jumping_factor
                # This will load the value irrespective of whether it starts
                # with # (i.e. the jumping factor adaptation was started) or not.
                jump_value = jump_file.read().replace('# ', '')
                data.jumping_factor = float(jump_value)
            jump_file.close()
            print 'rank = ', rank
        except:
            jump_file = open(command_line.folder + '/jumping_factor.txt', 'w')
            jump_file.write(str(data.jumping_factor))
            jump_file.close()
            rank = 0
            print 'rank = ', rank
            starting_jumping_factor = data.jumping_factor

    # Recover the covariance matrix according to the input, if the varying set
    # of parameters is non-zero
    if (data.get_mcmc_parameters(['varying']) != []):

        # Read input covariance matrix
        sigma_eig, U, C = sampler.get_covariance_matrix(
            cosmo, data, command_line)

        # if we want to compute the starting point by minimising lnL (instead of taking it from input file or bestfit file)
        minimum = 0
        if command_line.minimize:
            minimum = sampler.get_minimum(cosmo, data, command_line, C)
            parameter_names = data.get_mcmc_parameters(['last_accepted'])
            for index, elem in parameter_names:
                data.mcmc_parameters[elem]['last_accepted'] = minimum[index]

        # if we want to compute Fisher matrix and then stop
        if command_line.fisher:
            sampler.get_fisher_matrix(cosmo, data, command_line, C, minimum)
            return

        # warning if no jumps are requested
        if data.jumping_factor == 0:
            warnings.warn(
                "The jumping factor has been set to 0. The above covariance " +
                "matrix will not be used.")

    # In case of a fiducial run (all parameters fixed), simply run once and
    # print out the likelihood. This should not be used any more (one has to
    # modify the log.param, which is never a good idea. Instead, force the code
    # to use a jumping factor of 0 with the option "-f 0".
    else:
        warnings.warn(
            "You are running with no varying parameters... I will compute " +
            "only one point and exit")
        data.update_cosmo_arguments()  # this fills in the fixed parameters
        loglike = sampler.compute_lkl(cosmo, data)
        io_mp.print_vector(outputs, 1, loglike, data)
        return 1, loglike

    # In the fast-slow method, one need the Cholesky decomposition of the
    # covariance matrix. Return the Cholesky decomposition as a lower
    # triangular matrix
    Cholesky = None
    Rotation = None
    if command_line.jumping == 'fast':
        Cholesky = la.cholesky(C).T
        Rotation = np.identity(len(sigma_eig))

    # define path and covmat
    input_covmat = command_line.cov
    base = os.path.basename(command_line.folder)
    # the previous line fails when "folder" is a string ending with a slash. This issue is cured by the next lines:
    if base == '':
        base = os.path.basename(command_line.folder[:-1])
    command_line.cov = os.path.join(command_line.folder, base + '.covmat')

    # Fast Parameter Multiplier (fpm) for adjusting update and superupdate numbers.
    # This is equal to N_slow + f_fast N_fast, where N_slow is the number of slow
    # parameters, f_fast is the over sampling number for each fast block and f_fast
    # is the number of parameters in each fast block.
    for i in range(len(data.block_parameters)):
        if i == 0:
            fpm = data.over_sampling[i] * data.block_parameters[i]
        else:
            fpm += data.over_sampling[i] * (data.block_parameters[i] -
                                            data.block_parameters[i - 1])

    # If the update mode was selected, the previous (or original) matrix should be stored
    if command_line.update:
        if not rank and not command_line.silent:
            print 'Update routine is enabled with value %d (recommended: 50)' % command_line.update
            print 'This number is rescaled by cycle length %d (N_slow + f_fast * N_fast) to %d' % (
                fpm, fpm * command_line.update)
        # Rescale update number by cycle length N_slow + f_fast * N_fast to account for fast parameters
        command_line.update *= fpm
        previous = (sigma_eig, U, C, Cholesky)

    # Initialise adaptive
    if command_line.adaptive:
        if not command_line.silent:
            print 'Adaptive routine is enabled with value %d (recommended: 10*dimension)' % command_line.adaptive
            print 'and adaptive_ts = %d (recommended: 100*dimension)' % command_line.adaptive_ts
            print 'Please note: current implementation not suitable for multiple chains'
        if rank > 0:
            raise io_mp.ConfigurationError(
                'Adaptive routine not compatible with MPI')
        if command_line.update:
            warnings.warn(
                'Adaptive routine not compatible with update, overwriting input update value'
            )
        if command_line.superupdate:
            warnings.warn(
                'Adaptive routine not compatible with superupdate, deactivating superupdate'
            )
            command_line.superupdate = 0
        # Define needed parameters
        parameter_names = data.get_mcmc_parameters(['varying'])
        mean = np.zeros(len(parameter_names))
        last_accepted = np.zeros(len(parameter_names), 'float64')
        ar = np.zeros(100)
        if command_line.cov == None:
            # If no input covmat was given, the starting jumping factor
            # should be very small until a covmat is obtained and the
            # original start jumping factor should be saved
            start_jumping_factor = command_line.jumping_factor
            data.jumping_factor = command_line.jumping_factor / 100.
            # Analyze module will be forced to compute one covmat,
            # after which update flag will be set to False.
            command_line.update = command_line.adaptive
        else:
            # If an input covmat was provided, take mean values from param file
            # Question: is it better to always do this, rather than setting mean
            # to last accepted after the initial update run?
            for elem in parameter_names:
                mean[parameter_names.index(
                    elem)] = data.mcmc_parameters[elem]['initial'][0]

    # Initialize superupdate
    if command_line.superupdate:
        if not rank and not command_line.silent:
            print 'Superupdate routine is enabled with value %d (recommended: 20)' % command_line.superupdate
            if command_line.superupdate < 20:
                warnings.warn(
                    'Superupdate value lower than the recommended value. This '
                    'may increase the risk of poorly converged acceptance rate'
                )
            print 'This number is rescaled by cycle length %d (N_slow + f_fast * N_fast) to %d' % (
                fpm, fpm * command_line.superupdate)
        # Rescale superupdate number by cycle length N_slow + f_fast * N_fast to account for fast parameters
        command_line.superupdate *= fpm
        # Define needed parameters
        parameter_names = data.get_mcmc_parameters(['varying'])
        updated_steps = 0
        stop_c = False
        jumping_factor_rescale = 0
        if command_line.restart:
            try:
                jump_file = open(command_line.cov, 'r')
                jumping_factor_rescale = 1
            except:
                jumping_factor_rescale = 0
        c_array = np.zeros(command_line.superupdate
                           )  # Allows computation of mean of jumping factor
        R_minus_one = np.array([
            100., 100.
        ])  # 100 to make sure max(R-1) value is high if computation failed
        # Local acceptance rate of last SU*(N_slow + f_fast * N_fast) steps
        ar = np.zeros(command_line.superupdate)
        # Store acceptance rate of last 5*SU*(N_slow + f_fast * N_fast) steps
        backup_ar = np.zeros(5 * command_line.superupdate)
        # Make sure update is enabled
        if command_line.update == 0:
            if not rank and not command_line.silent:
                print 'Update routine required by superupdate. Setting --update 50'
                print 'This number is then rescaled by cycle length: %d (N_slow + f_fast * N_fast)' % fpm
            command_line.update = 50 * fpm
            previous = (sigma_eig, U, C, Cholesky)

    # If restart wanted, pick initial value for arguments
    if command_line.restart is not None:
        sampler.read_args_from_chain(data, command_line.restart)

    # If restart from best fit file, read first point (overwrite settings of
    # read_args_from_chain)
    if command_line.bf is not None and not command_line.minimize:
        sampler.read_args_from_bestfit(data, command_line.bf)

    # Pick a position (from last accepted point if restart, from the mean value
    # else), with a 100 tries.
    for i in range(100):
        if get_new_position(data, sigma_eig, U, i, Cholesky, Rotation) is True:
            break
        if i == 99:
            raise io_mp.ConfigurationError(
                "You should probably check your prior boundaries... because " +
                "no valid starting position was found after 100 tries")

    # Compute the starting Likelihood
    loglike = sampler.compute_lkl(cosmo, data)

    # Choose this step as the last accepted value
    # (accept_step), and modify accordingly the max_loglike
    sampler.accept_step(data)
    max_loglike = loglike

    # If the jumping factor is 0, the likelihood associated with this point is
    # displayed, and the code exits.
    if data.jumping_factor == 0:
        io_mp.print_vector(outputs, 1, loglike, data)
        return 1, loglike

    acc, rej = 0.0, 0.0  # acceptance and rejection number count
    N = 1  # number of time the system stayed in the current position

    # Print on screen the computed parameters
    if not command_line.silent and not command_line.quiet:
        io_mp.print_parameters(sys.stdout, data)

    # Suppress non-informative output after initializing
    command_line.quiet = True

    k = 1
    # Main loop, that goes on while the maximum number of failure is not
    # reached, and while the expected amount of steps (N) is not taken.
    while k <= command_line.N:
        # If the number of steps reaches the number set in the adaptive method plus one,
        # then the proposal distribution should be gradually adapted.
        # If the number of steps also exceeds the number set in adaptive_ts,
        # the jumping factor should be gradually adapted.
        if command_line.adaptive and k > command_line.adaptive + 1:
            # Start of adaptive routine
            # By B. Schroer and T. Brinckmann
            # Modified version of the method outlined in the PhD thesis of Marta Spinelli

            # Store last accepted step
            for elem in parameter_names:
                last_accepted[parameter_names.index(
                    elem)] = data.mcmc_parameters[elem]['last_accepted']
            # Recursion formula for mean and covmat (and jumping factor after ts steps)
            # mean(k) = mean(k-1) + (last_accepted - mean(k-1))/k
            mean += 1. / k * (last_accepted - mean)
            # C(k) = C(k-1) + [(last_accepted - mean(k))^T * (last_accepted - mean(k)) - C(k-1)]/k
            C += 1. / k * (
                np.dot(np.transpose(np.asmatrix(last_accepted - mean)),
                       np.asmatrix(last_accepted - mean)) - C)
            sigma_eig, U = np.linalg.eig(np.linalg.inv(C))
            if command_line.jumping == 'fast':
                Cholesky = la.cholesky(C).T
            if k > command_line.adaptive_ts:
                # c = j^2/d
                c = data.jumping_factor**2 / len(parameter_names)
                # c(k) = c(k-1) + [acceptance_rate(last 100 steps) - 0.25]/k
                c += (np.mean(ar) - 0.25) / k
                data.jumping_factor = np.sqrt(len(parameter_names) * c)

            # Save the covariance matrix and the jumping factor in a file
            # For a possible MPI implementation
            #if not (k-command_line.adaptive) % 5:
            #    io_mp.write_covariance_matrix(C,parameter_names,str(command_line.cov))
            #    jump_file = open(command_line.folder + '/jumping_factor.txt','w')
            #    jump_file.write(str(data.jumping_factor))
            #    jump_file.close()
            # End of adaptive routine

    # If the number of steps reaches the number set in the update method,
    # then the proposal distribution should be adapted.
        if command_line.update:
            # Start of update routine
            # By M. Ballardini and T. Brinckmann
            # Also used by superupdate and adaptive

            # master chain behavior
            if not rank:
                # Add the folder to the list of files to analyze, and switch on the
                # options for computing only the covmat
                from parser_mp import parse
                info_command_line = parse(
                    'info %s --minimal --noplot --keep-fraction 0.5 --keep-non-markovian --want-covmat'
                    % command_line.folder)
                info_command_line.update = command_line.update

                if command_line.adaptive:
                    # Keep all points for covmat guess in adaptive
                    info_command_line = parse(
                        'info %s --minimal --noplot --keep-non-markovian --want-covmat'
                        % command_line.folder)
                    # Tell the analysis to update the covmat after t0 steps if it is adaptive
                    info_command_line.adaptive = command_line.adaptive
                    # Only compute covmat if no input covmat was provided
                    if input_covmat != None:
                        info_command_line.want_covmat = False

                # This is in order to allow for more frequent R-1 computation with superupdate
                compute_R_minus_one = False
                if command_line.superupdate:
                    if not (k + 10) % command_line.superupdate:
                        compute_R_minus_one = True
                # the +10 below is here to ensure that the first master update will take place before the first slave updates,
                # but this is a detail, the code is robust against situations where updating is not possible, so +10 could be omitted
                if (not (k + 10) % command_line.update
                        or compute_R_minus_one) and k > 10:
                    # Try to launch an analyze (computing a new covmat if successful)
                    try:
                        if not (k + 10) % command_line.update:
                            from analyze import analyze
                            R_minus_one = analyze(info_command_line)
                        elif command_line.superupdate:
                            # Compute (only, i.e. no covmat) R-1 more often when using superupdate
                            info_command_line = parse(
                                'info %s --minimal --noplot --keep-fraction 0.5 --keep-non-markovian'
                                % command_line.folder)
                            info_command_line.update = command_line.update
                            R_minus_one = analyze(info_command_line)
                    except:
                        if not command_line.silent:
                            print 'Step ', k, ' chain ', rank, ': Failed to calculate covariance matrix'

                if command_line.superupdate:
                    # Start of superupdate routine
                    # By B. Schroer and T. Brinckmann

                    c_array[(k - 1) %
                            (command_line.superupdate)] = data.jumping_factor

                    # If acceptance rate deviates too much from the target acceptance
                    # rate we want to resume adapting the jumping factor
                    # T. Brinckmann 02/2019: use mean a.r. over the last 5*len(ar) steps
                    # instead or the over last len(ar), which is more stable
                    if abs(np.mean(backup_ar) - command_line.superupdate_ar
                           ) > 5. * command_line.superupdate_ar_tol:
                        stop_c = False

                    # Start adapting the jumping factor after command_line.superupdate steps if R-1 < 10
                    # The lower R-1 criterium is an arbitrary choice to keep from updating when the R-1
                    # calculation fails (i.e. returns only zeros).
                    if (k > updated_steps + command_line.superupdate
                        ) and 0.01 < (max(R_minus_one) < 10.) and not stop_c:
                        c = data.jumping_factor**2 / len(parameter_names)
                        # To avoid getting trapped in local minima, the jumping factor should
                        # not go below 0.1 (arbitrary) times the starting jumping factor.
                        if (c + (np.mean(ar) - command_line.superupdate_ar) /
                            (k - updated_steps)) > (
                                0.1 * starting_jumping_factor
                            )**2. / len(parameter_names) or (
                                (np.mean(ar) - command_line.superupdate_ar) /
                                (k - updated_steps) > 0):
                            c += (np.mean(ar) - command_line.superupdate_ar
                                  ) / (k - updated_steps)
                            data.jumping_factor = np.sqrt(
                                len(parameter_names) * c)

                        if not (k - 1) % 5:
                            # Check if the jumping factor adaptation should stop.
                            # An acceptance rate of 25% balances the wish for more accepted
                            # points, while ensuring the parameter space is properly sampled.
                            # The convergence criterium is by default (26+/-1)%, so the adaptation
                            # will stop when the code reaches an acceptance rate of at least 25%.
                            # T. Brinckmann 02/2019: use mean a.r. over the last 5*len(ar) steps
                            # instead or the over last len(ar), which is more stable
                            if (max(R_minus_one) < 0.4) and (
                                    abs(
                                        np.mean(backup_ar) -
                                        command_line.superupdate_ar) <
                                    command_line.superupdate_ar_tol) and (abs(
                                        np.mean(c_array) / c_array[
                                            (k - 1) %
                                            (command_line.superupdate)] -
                                        1) < 0.01):
                                stop_c = True
                                data.out.write(
                                    '# After %d accepted steps: stop adapting the jumping factor at a value of %f with a local acceptance rate %f \n'
                                    % (int(acc), data.jumping_factor,
                                       np.mean(backup_ar)))
                                if not command_line.silent:
                                    print 'After %d accepted steps: stop adapting the jumping factor at a value of %f with a local acceptance rate of %f \n' % (
                                        int(acc), data.jumping_factor,
                                        np.mean(backup_ar))
                                jump_file = open(
                                    command_line.folder +
                                    '/jumping_factor.txt', 'w')
                                jump_file.write('# ' +
                                                str(data.jumping_factor))
                                jump_file.close()
                            else:
                                jump_file = open(
                                    command_line.folder +
                                    '/jumping_factor.txt', 'w')
                                jump_file.write(str(data.jumping_factor))
                                jump_file.close()

                    # Write the evolution of the jumping factor to a file
                    if not k % (command_line.superupdate):
                        jump_file = open(
                            command_line.folder + '/jumping_factors.txt', 'a')
                        for i in xrange(command_line.superupdate):
                            jump_file.write(str(c_array[i]) + '\n')
                        jump_file.close()
                    # End of main part of superupdate routine

                if not (k - 1) % (command_line.update / 3):
                    try:
                        # Read the covmat
                        sigma_eig, U, C = sampler.get_covariance_matrix(
                            cosmo, data, command_line)
                        if command_line.jumping == 'fast':
                            Cholesky = la.cholesky(C).T
                        # Test here whether the covariance matrix has really changed
                        # We should in principle test all terms, but testing the first one should suffice
                        if not C[0, 0] == previous[2][0, 0]:
                            if k == 1:
                                if not command_line.silent:
                                    if not input_covmat == None:
                                        warnings.warn(
                                            'Appending to an existing folder: using %s instead of %s. '
                                            'If new input covmat is desired, please delete previous covmat.'
                                            % (command_line.cov, input_covmat))
                                    else:
                                        warnings.warn(
                                            'Appending to an existing folder: using %s. '
                                            'If no starting covmat is desired, please delete previous covmat.'
                                            % command_line.cov)
                            else:
                                # Start of second part of superupdate routine
                                if command_line.superupdate:
                                    # Adaptation of jumping factor should start again after the covmat is updated
                                    # Save the step number after it updated for superupdate and start adaption of c again
                                    updated_steps = k
                                    stop_c = False
                                    cov_det = np.linalg.det(C)
                                    prev_cov_det = np.linalg.det(previous[2])
                                    # Rescale jumping factor in order to keep the magnitude of the jumps the same.
                                    # Skip this update the first time the covmat is updated in order to prevent
                                    # problems due to a poor initial covmat. Rescale the jumping factor after the
                                    # first calculated covmat to the expected optimal one of 2.4.
                                    if jumping_factor_rescale:
                                        new_jumping_factor = data.jumping_factor * (
                                            prev_cov_det / cov_det)**(
                                                1. /
                                                (2 * len(parameter_names)))
                                        data.out.write(
                                            '# After %d accepted steps: rescaled jumping factor from %f to %f, due to updated covariance matrix \n'
                                            % (int(acc), data.jumping_factor,
                                               new_jumping_factor))
                                        if not command_line.silent:
                                            print 'After %d accepted steps: rescaled jumping factor from %f to %f, due to updated covariance matrix \n' % (
                                                int(acc), data.jumping_factor,
                                                new_jumping_factor)
                                        data.jumping_factor = new_jumping_factor
                                    else:
                                        data.jumping_factor = starting_jumping_factor
                                    jumping_factor_rescale += 1
                            # End of second part of superupdate routine

                            # Write to chains file when the covmat was updated
                                data.out.write(
                                    '# After %d accepted steps: update proposal with max(R-1) = %f and jumping factor = %f \n'
                                    % (int(acc), max(R_minus_one),
                                       data.jumping_factor))
                                if not command_line.silent:
                                    print 'After %d accepted steps: update proposal with max(R-1) = %f and jumping factor = %f \n' % (
                                        int(acc), max(R_minus_one),
                                        data.jumping_factor)
                                try:
                                    if stop - after - update:
                                        k = command_line.N
                                        print 'Covariance matrix updated - stopping run'
                                except:
                                    pass

                            previous = (sigma_eig, U, C, Cholesky)
                    except:
                        pass

                    command_line.quiet = True

                    # Start of second part of adaptive routine
                    # Stop updating the covmat after t0 steps in adaptive
                    if command_line.adaptive and k > 1:
                        command_line.update = 0
                        data.jumping_factor = start_jumping_factor
                        # Test if there are still enough steps left before the adaption of the jumping factor starts
                        if k > 0.5 * command_line.adaptive_ts:
                            command_line.adaptive_ts += k
        # Set the mean for the recursion formula to the last accepted point
                        for elem in parameter_names:
                            mean[parameter_names.index(
                                elem
                            )] = data.mcmc_parameters[elem]['last_accepted']
                    # End of second part of adaptive routine

            # slave chain behavior
            else:
                # Start of slave superupdate routine
                if command_line.superupdate:
                    # If acceptance rate deviates too much from the target acceptance
                    # rate we want to resume adapting the jumping factor. This line
                    # will force the slave chains to check if the jumping factor
                    # has been updated
                    if abs(np.mean(backup_ar) - command_line.superupdate_ar
                           ) > 5. * command_line.superupdate_ar_tol:
                        stop_c = False

        # Update the jumping factor every 5 steps in superupdate
                    if not k % 5 and k > command_line.superupdate and command_line.superupdate and (
                            not stop_c or
                        (stop_c and k % command_line.update)):
                        try:
                            jump_file = open(
                                command_line.folder + '/jumping_factor.txt',
                                'r')
                            # If there is a # in the file, the master has stopped adapting c
                            for line in jump_file:
                                if line.find('#') == -1:
                                    jump_file.seek(0)
                                    jump_value = jump_file.read()
                                    data.jumping_factor = float(jump_value)
                                else:
                                    jump_file.seek(0)
                                    jump_value = jump_file.read().replace(
                                        '# ', '')
                                    #if not stop_c or (stop_c and not float(jump_value) == data.jumping_factor):
                                    if not float(
                                            jump_value) == data.jumping_factor:
                                        data.jumping_factor = float(jump_value)
                                        stop_c = True
                                        data.out.write(
                                            '# After %d accepted steps: stop adapting the jumping factor at a value of %f with a local acceptance rate %f \n'
                                            % (int(acc), data.jumping_factor,
                                               np.mean(backup_ar)))
                                        if not command_line.silent:
                                            print 'After %d accepted steps: stop adapting the jumping factor at a value of %f with a local acceptance rate of %f \n' % (
                                                int(acc), data.jumping_factor,
                                                np.mean(backup_ar))
                            jump_file.close()
                        except:
                            if not command_line.silent:
                                print 'Reading jumping_factor file failed'
                            pass
                # End of slave superupdate routine

                # Start of slave update routine
                if not (k - 1) % (command_line.update / 10):
                    try:
                        sigma_eig, U, C = sampler.get_covariance_matrix(
                            cosmo, data, command_line)
                        if command_line.jumping == 'fast':
                            Cholesky = la.cholesky(C).T
                        # Test here whether the covariance matrix has really changed
                        # We should in principle test all terms, but testing the first one should suffice
                        if not C[0, 0] == previous[2][0, 0] and not k == 1:
                            if command_line.superupdate:
                                # If the covmat was updated, the master has resumed adapting c
                                stop_c = False
                            data.out.write(
                                '# After %d accepted steps: update proposal \n'
                                % int(acc))
                            if not command_line.silent:
                                print 'After %d accepted steps: update proposal \n' % int(
                                    acc)
                            try:
                                if stop_after_update:
                                    k = command_line.N
                                    print 'Covariance matrix updated - stopping run'
                            except:
                                pass
                        previous = (sigma_eig, U, C, Cholesky)

                    except:
                        pass
                # End of slave update routine
            # End of update routine

    # Pick a new position ('current' flag in mcmc_parameters), and compute
    # its likelihood. If get_new_position returns True, it means it did not
    # encounter any boundary problem. Otherwise, just increase the
    # multiplicity of the point and start the loop again
        if get_new_position(data, sigma_eig, U, k, Cholesky, Rotation) is True:
            newloglike = sampler.compute_lkl(cosmo, data)
        else:  # reject step
            rej += 1
            if command_line.superupdate:
                ar[k % len(
                    ar
                )] = 0  # Local acceptance rate of last SU*(N_slow + f_fast * N_fast) steps
            elif command_line.adaptive:
                ar[k % len(ar)] = 0  # Local acceptance rate of last 100 steps
            N += 1
            k += 1
            continue

    # Harmless trick to avoid exponentiating large numbers. This decides
    # whether or not the system should move.
        if (newloglike != data.boundary_loglike):
            if (newloglike >= loglike):
                alpha = 1.
            else:
                alpha = np.exp(newloglike - loglike)
        else:
            alpha = -1

        if ((alpha == 1.) or (rd.uniform(0, 1) < alpha)):  # accept step

            # Print out the last accepted step (WARNING: this is NOT the one we
            # just computed ('current' flag), but really the previous one.)
            # with its proper multiplicity (number of times the system stayed
            # there).
            io_mp.print_vector(outputs, N, loglike, data)

            # Report the 'current' point to the 'last_accepted'
            sampler.accept_step(data)
            loglike = newloglike
            if loglike > max_loglike:
                max_loglike = loglike
            acc += 1.0
            N = 1  # Reset the multiplicity
            if command_line.superupdate:
                ar[k % len(
                    ar
                )] = 1  # Local acceptance rate of last SU*(N_slow + f_fast * N_fast) steps
            elif command_line.adaptive:
                ar[k % len(ar)] = 1  # Local acceptance rate of last 100 steps
        else:  # reject step
            rej += 1.0
            N += 1  # Increase multiplicity of last accepted point
            if command_line.superupdate:
                ar[k % len(
                    ar
                )] = 0  # Local acceptance rate of last SU*(N_slow + f_fast * N_fast) steps
            elif command_line.adaptive:
                ar[k % len(ar)] = 0  # Local acceptance rate of last 100 steps

    # Store a.r. for last 5 x SU*(N_slow + f_fast * N_fast) steps
        if command_line.superupdate:
            backup_ar[k % len(backup_ar)] = ar[k % len(ar)]

    # Regularly (option to set in parameter file), close and reopen the
    # buffer to force to write on file.
        if acc % data.write_step == 0:
            io_mp.refresh_file(data)
            # Update the outputs list
            outputs[0] = data.out
        k += 1  # One iteration done
    # END OF WHILE LOOP

    # If at this moment, the multiplicity is higher than 1, it means the
    # current point is not yet accepted, but it also mean that we did not print
    # out the last_accepted one yet. So we do.
    if N > 1:
        io_mp.print_vector(outputs, N - 1, loglike, data)

    # Print out some information on the finished chain
    rate = acc / (acc + rej)
    sys.stdout.write('\n#  {0} steps done, acceptance rate: {1}\n'.format(
        command_line.N, rate))

    # In case the acceptance rate is too low, or too high, print a warning
    if rate < 0.05:
        warnings.warn("The acceptance rate is below 0.05. You might want to "
                      "set the jumping factor to a lower value than the "
                      "default (2.4), with the option `-f 1.5` for instance.")
    elif rate > 0.6:
        warnings.warn("The acceptance rate is above 0.6, which means you might"
                      " have difficulties exploring the entire parameter space"
                      ". Try analysing these chains, and use the output "
                      "covariance matrix to decrease the acceptance rate to a "
                      "value between 0.2 and 0.4 (roughly).")
    # For a restart, erase the starting point to keep only the new, longer
    # chain.
    if command_line.restart is not None:
        os.remove(command_line.restart)
        sys.stdout.write(
            '    deleting starting point of the chain {0}\n'.format(
                command_line.restart))

    return
Beispiel #25
0
    def group_parameters_in_blocks(self):
        """
        Regroup mcmc parameters by blocks of same speed

        This method divides all varying parameters from :attr:`mcmc_parameters`
        into as many categories as there are likelihoods, plus one (the slow
        block of cosmological parameters).

        It creates the attribute :attr:`block_parameters`, to be used in the
        module :mod:`mcmc`.

        .. note::

            It does not compute by any mean the real speed of each parameter,
            instead, every parameter belonging to the same likelihood will
            be considered as fast as its neighbour.

        .. warning::

            It assumes that the nuisance parameters are already written
            sequentially, and grouped together (not necessarily in the order
            described in :attr:`experiments`). If you mix up the different
            nuisance parameters in the .param file, this routine will not
            method as intended. It also assumes that the cosmological
            parameters are written at the beginning of the file.

        """
        array = []
        # First obvious block is all cosmological parameters
        array.append(len(self.get_mcmc_parameters(['varying', 'cosmo'])))
        # Then, store all nuisance parameters
        nuisance = self.get_mcmc_parameters(['varying', 'nuisance'])

        # Create an array to keep track of the already taken into account
        # nuisance parameters. This will come in handy when using likelihoods
        # that share some nuisance parameters.
        used_nuisance = []
        for likelihood in self.lkl.itervalues():
            count = 0
            for elem in nuisance:
                if elem in likelihood.nuisance:
                    if elem not in used_nuisance:
                        count += 1
                        used_nuisance.append(elem)
            likelihood.varying_nuisance_parameters = count

        # Then circle through them
        index = 0
        while index < len(nuisance):
            elem = nuisance[index]
            flag = False
            # For each one, check if they belong to a likelihood
            for likelihood in self.lkl.itervalues():
                if (elem in likelihood.nuisance) and (index < len(nuisance)):
                    # If yes, store the number of nuisance parameters needed
                    # for this likelihood.
                    flag = True
                    array.append(likelihood.varying_nuisance_parameters +
                                 array[-1])
                    index += likelihood.varying_nuisance_parameters
                    continue
            if not flag:
                # If the loop reaches this part, it means this nuisance
                # parameter was associated with no likelihood: this should not
                # happen
                raise io_mp.ConfigurationError(
                    "nuisance parameter %s " % elem +
                    "is associated to no likelihood")
        # Store the result
        self.block_parameters = array

        # Setting a default value for the over_sampling array
        if not self.over_sampling:
            self.over_sampling = [1 for _ in range(len(self.block_parameters))]
        # Test that the over_sampling list has the same size as
        # block_parameters.
        else:
            try:
                assert len(self.block_parameters) == len(self.over_sampling)
            except AssertionError:
                raise io_mp.ConfigurationError(
                    "The length of the over_sampling field should be"
                    " equal to the number of blocks (one for cosmological "
                    "parameters, plus one for each likelihood with "
                    "nuisance parameters)")

        # Create a list of indices corresponding of the oversampling strategy
        self.assign_over_sampling_indices()
Beispiel #26
0
def chain(cosmo, data, command_line):
    """
    Run a Markov chain of fixed length.

    Main function of this module, this is the actual Markov chain procedure.
    After having selected a starting point in parameter space defining the
    first **last accepted** one, it will, for a given amount of steps :

    + choose randomnly a new point following the *proposal density*,
    + compute the cosmological *observables* through the cosmological module,
    + compute the value of the *likelihoods* of the desired experiments at this point,
    + *accept/reject* this point given its likelihood compared to the one of
      the last accepted one.

    Every time the code accepts :code:`data.write_step` number of points
    (quantity defined in the input parameter file), it will write the result to
    disk (flushing the buffer by forcing to exit the output file, and reopen it
    again.

    .. note::

        to use the code to set a fiducial file for certain fixed parameters,
        you can use two solutions. The first one is to put all input 1-sigma
        proposal density to zero (this method still works, but is not
        recommended anymore). The second one consist in using the flag "-f 0",
        to force a step of zero amplitude.

    """

    ## Initialisation
    loglike = 0

    # In case command_line.silent has been asked, outputs should only contain
    # data.out. Otherwise, it will also contain sys.stdout
    outputs = [data.out]
    if not command_line.silent:
        outputs.append(sys.stdout)

    # Recover the covariance matrix according to the input, if the varying set
    # of parameters is non-zero
    if (data.get_mcmc_parameters(['varying']) != []):
        sigma_eig, U, C = sampler.get_covariance_matrix(data, command_line)
        if data.jumping_factor == 0:
            warnings.warn(
                "The jumping factor has been set to 0. The above covariance " +
                "matrix will not be used.")

    # In case of a fiducial run (all parameters fixed), simply run once and
    # print out the likelihood. This should not be used any more (one has to
    # modify the log.param, which is never a good idea. Instead, force the code
    # to use a jumping factor of 0 with the option "-f 0".
    else:
        warnings.warn(
            "You are running with no varying parameters... I will compute " +
            "only one point and exit")
        data.update_cosmo_arguments()  # this fills in the fixed parameters
        loglike = sampler.compute_lkl(cosmo, data)
        io_mp.print_vector(outputs, 1, loglike, data)
        return 1, loglike

    # In the fast-slow method, one need the Cholesky decomposition of the
    # covariance matrix. Return the Cholesky decomposition as a lower
    # triangular matrix
    Cholesky = None
    Inverse_Cholesky = None
    Rotation = None
    if command_line.jumping == 'fast':
        Cholesky = la.cholesky(C).T
        Inverse_Cholesky = np.linalg.inv(Cholesky)
        Rotation = np.identity(len(sigma_eig))

    # If restart wanted, pick initial value for arguments
    if command_line.restart is not None:
        sampler.read_args_from_chain(data, command_line.restart)

    # If restart from best fit file, read first point (overwrite settings of
    # read_args_from_chain)
    if command_line.bf is not None:
        sampler.read_args_from_bestfit(data, command_line.bf)

    # Pick a position (from last accepted point if restart, from the mean value
    # else), with a 100 tries.
    for i in range(100):
        if get_new_position(data, sigma_eig, U, i, Cholesky, Inverse_Cholesky,
                            Rotation) is True:
            break
        if i == 99:
            raise io_mp.ConfigurationError(
                "You should probably check your prior boundaries... because " +
                "no valid starting position was found after 100 tries")

    # Compute the starting Likelihood
    loglike = sampler.compute_lkl(cosmo, data)

    # Choose this step as the last accepted value
    # (accept_step), and modify accordingly the max_loglike
    sampler.accept_step(data)
    max_loglike = loglike

    # If the jumping factor is 0, the likelihood associated with this point is
    # displayed, and the code exits.
    if data.jumping_factor == 0:
        io_mp.print_vector(outputs, 1, loglike, data)
        return 1, loglike

    acc, rej = 0.0, 0.0  # acceptance and rejection number count
    N = 1  # number of time the system stayed in the current position

    # Print on screen the computed parameters
    io_mp.print_parameters(sys.stdout, data)

    k = 1
    # Main loop, that goes on while the maximum number of failure is not
    # reached, and while the expected amount of steps (N) is not taken.
    while k <= command_line.N:

        # Pick a new position ('current' flag in mcmc_parameters), and compute
        # its likelihood. If get_new_position returns True, it means it did not
        # encounter any boundary problem. Otherwise, just increase the
        # multiplicity of the point and start the loop again
        if get_new_position(data, sigma_eig, U, k, Cholesky, Inverse_Cholesky,
                            Rotation) is True:
            newloglike = sampler.compute_lkl(cosmo, data)
        else:  # reject step
            rej += 1
            N += 1
            k += 1
            continue

        # Harmless trick to avoid exponentiating large numbers. This decides
        # whether or not the system should move.
        if (newloglike != data.boundary_loglike):
            if (newloglike >= loglike):
                alpha = 1.
            else:
                alpha = np.exp(newloglike - loglike)
        else:
            alpha = -1

        if ((alpha == 1.) or (rd.uniform(0, 1) < alpha)):  # accept step

            # Print out the last accepted step (WARNING: this is NOT the one we
            # just computed ('current' flag), but really the previous one.)
            # with its proper multiplicity (number of times the system stayed
            # there).
            io_mp.print_vector(outputs, N, loglike, data)

            # Report the 'current' point to the 'last_accepted'
            sampler.accept_step(data)
            loglike = newloglike
            if loglike > max_loglike:
                max_loglike = loglike
            acc += 1.0
            N = 1  # Reset the multiplicity

        else:  # reject step
            rej += 1.0
            N += 1  # Increase multiplicity of last accepted point

        # Regularly (option to set in parameter file), close and reopen the
        # buffer to force to write on file.
        if acc % data.write_step == 0:
            io_mp.refresh_file(data)
            # Update the outputs list
            outputs[0] = data.out
        k += 1  # One iteration done
    # END OF WHILE LOOP

    # If at this moment, the multiplicity is higher than 1, it means the
    # current point is not yet accepted, but it also mean that we did not print
    # out the last_accepted one yet. So we do.
    if N > 1:
        io_mp.print_vector(outputs, N - 1, loglike, data)

    # Print out some information on the finished chain
    rate = acc / (acc + rej)
    sys.stdout.write('\n#  {0} steps done, acceptance rate: {1}\n'.format(
        command_line.N, rate))

    # In case the acceptance rate is too low, or too high, print a warning
    if rate < 0.05:
        warnings.warn("The acceptance rate is below 0.05. You might want to "
                      "set the jumping factor to a lower value than the "
                      "default (2.4), with the option `-f 1.5` for instance.")
    elif rate > 0.6:
        warnings.warn("The acceptance rate is above 0.6, which means you might"
                      " have difficulties exploring the entire parameter space"
                      ". Try analysing these chains, and use the output "
                      "covariance matrix to decrease the acceptance rate to a "
                      "value between 0.2 and 0.4 (roughly).")

    # For a restart, erase the starting point to keep only the new, longer
    # chain.
    if command_line.restart is not None:
        os.remove(command_line.restart)
        sys.stdout.write(
            '    deleting starting point of the chain {0}\n'.format(
                command_line.restart))

    return
Beispiel #27
0
    def __init__(self, path, data, command_line):

        Likelihood.__init__(self, path, data, command_line)

        #TS; new k_cut
        self.need_cosmo_arguments(data, {'output': 'mPk'})
        self.need_cosmo_arguments(data, {'z_max_pk': self.zmax})
        self.need_cosmo_arguments(
            data, {'P_k_max_1/Mpc': 1.5 * self.k_cut(self.zmax)})

        # TS;Compute non-linear power spectrum if requested
        if self.use_halofit:
            self.need_cosmo_arguments(data, {'non linear': 'halofit'})
            print("Using halofit")

        fid_file_path = os.path.join(self.data_directory, self.fiducial_file)
        self.fid_values_exist = False
        if os.path.exists(fid_file_path):
            self.fid_values_exist = True
        # TB; request transfer functions for bias correction
        if self.use_neutrino_bias or self.use_cdm_plus_baryons_pk or (
                self.fid_values_exist is False and
            (self.use_fiducial_neutrino_bias
             or self.use_fiducial_cdm_plus_baryons_pk)):
            self.need_cosmo_arguments(data, {'output': 'mPk, dTk'})
            if self.use_neutrino_bias and self.use_cdm_plus_baryons_pk:
                raise io_mp.ConfigurationError(
                    'Check settings for euclid_pk in the .data file: '
                    'you should not set both \'use_neutrino bias\''
                    'and \'use_cdm_plus_baryons_pk\' to True')
            if self.use_fiducial_neutrino_bias and self.use_fiducial_cdm_plus_baryons_pk:
                raise io_mp.ConfigurationError(
                    'Check settings for euclid_pk in the .data file: '
                    'you should not set both \'use_fiducial_neutrino bias\''
                    'and \'use_fiducial_cdm_plus_baryons_pk\' to True')

        #################
        # find number of galaxies for each mean redshift value
        #################

        # Deduce the dz step from the number of bins and the edge values of z
        self.dz = (self.zmax - self.zmin) / (self.nbin - 1.)

        # Compute the number of galaxies for each \bar z
        # For this, one needs dn/dz TODO
        # then n_g(\bar z) = int_{\bar z - dz/2}^{\bar z + dz/2} dn/dz dz

        # self.z_mean will contain the central values
        self.z_mean = np.linspace(self.zmin, self.zmax, num=self.nbin)

        # Store the z edge values
        self.z_edges = np.linspace(self.zmin - self.dz / 2.,
                                   self.zmax + self.dz / 2,
                                   num=self.nbin + 1)

        # Store the total vector z, with edges + mean
        self.z = np.linspace(self.zmin - self.dz / 2.,
                             self.zmax + self.dz / 2.,
                             num=2 * self.nbin + 1)

        # Define the k values for the integration (from kmin to kmax), at which
        # the spectrum will be computed (and stored for the fiducial model)
        # k_size is deeply arbitrary here, TODO
        self.k_fid = np.logspace(log10(self.kmin),
                                 log10(self.k_cut(self.zmax)),
                                 num=self.k_size)

        # TS; Define the mu scale
        self.mu_fid = np.linspace(-1, 1, self.mu_size)

        ################
        # Noise spectrum
        ################
        # TS; original settings (deleted unused settings), fsky

        self.n_g = np.zeros(self.nbin, 'float64')

        self.n_g = np.array([
            6844.945, 7129.45, 7249.912, 7261.722, 7203.825, 7103.047,
            6977.571, 6839.546, 6696.957, 5496.988, 4459.240, 3577.143,
            2838.767, 2229.282, 1732.706, 1333.091
        ])
        self.n_g = self.n_g * self.fsky * 41253. * self.efficiency
        """
	# TS; Ntot output
	print("\nEuclid: Number of detected galaxies in each redshift bin:")
	for index_z in xrange(self.nbin):
		print("z-bin[" + str(self.z_mean[index_z]-self.dz/2.) + "," + str(self.z_mean[index_z]+self.dz/2.) + "]: \tN = %.4g" % (self.n_g[index_z]) + " ,\t b = %.4g" % (self.b[index_z]))
	N_tot = np.sum(self.n_g)
	print("Total number of detected galaxies: N = %.4g\n" % (N_tot))
	"""

        # If the file exists, initialize the fiducial values, the spectrum will
        # be read first, with k_size values of k and nbin values of z. Then,
        # H_fid and D_A fid will be read (each with nbin values).
        self.fid_values_exist = False
        self.pk_nl_fid = np.zeros((self.k_size, 2 * self.nbin + 1), 'float64')
        if self.use_linear_rsd:
            self.pk_lin_fid = np.zeros((self.k_size, 2 * self.nbin + 1),
                                       'float64')
        self.H_fid = np.zeros(2 * self.nbin + 1, 'float64')
        self.D_A_fid = np.zeros(2 * self.nbin + 1, 'float64')
        self.sigma_r_fid = np.zeros(self.nbin, 'float64')
        self.V_fid = np.zeros(self.nbin, 'float64')
        self.b_fid = np.zeros(self.nbin, 'float64')

        fid_file_path = os.path.join(self.data_directory, self.fiducial_file)
        if os.path.exists(fid_file_path):
            self.fid_values_exist = True
            with open(fid_file_path, 'r') as fid_file:
                line = fid_file.readline()
                while line.find('#') != -1:
                    line = fid_file.readline()
                while (line.find('\n') != -1 and len(line) == 1):
                    line = fid_file.readline()
                for index_k in xrange(self.k_size):
                    for index_z in xrange(2 * self.nbin + 1):
                        if self.use_linear_rsd:
                            self.pk_nl_fid[index_k,
                                           index_z] = float(line.split()[0])
                            self.pk_lin_fid[index_k,
                                            index_z] = float(line.split()[1])
                        else:
                            self.pk_nl_fid[index_k, index_z] = float(line)
                        line = fid_file.readline()
                for index_z in xrange(2 * self.nbin + 1):
                    self.H_fid[index_z] = float(line.split()[0])
                    self.D_A_fid[index_z] = float(line.split()[1])
                    line = fid_file.readline()
                for index_z in xrange(self.nbin):
                    self.sigma_r_fid[index_z] = float(line.split()[0])
                    #TS; load fiducial volume
                    self.V_fid[index_z] = float(line.split()[1])
                    #TS; load fiducial bias
                    self.b_fid[index_z] = float(line.split()[2])
                    line = fid_file.readline()
#TS; load fiducial sigma_NL
                self.sigma_NL_fid = float(line)
                #TB; load scale-dependent bias, if needed
                if self.use_fiducial_neutrino_bias or self.use_fiducial_cdm_plus_baryons_pk:
                    try:
                        self.b_array_fid = np.zeros([self.k_size, self.nbin],
                                                    dtype='float64')
                        for index_k in xrange(self.k_size):
                            line = fid_file.readline()
                            line = line.split()
                            for index_z in range(self.nbin):
                                line[index_z] = float(line[index_z])

                            self.b_array_fid[index_k, :] = np.array(
                                line, dtype='float64')
                    except:
                        raise io_mp.ConfigurationError(
                            'There appears to be a problem with the fiducial file. Probably '
                            'it was created with a different number of nuisance parameters, '
                            'different k_size, nbin, or without use_neutrino_bias=True'
                        )
        # Else the file will be created in the loglkl() function.
        return