コード例 #1
0
ファイル: cbcBayesMCMC2pos.py プロジェクト: lscsoft/lalsuite
def reassign_metadata(new_posterior, original_hdf5):

	# Make sure output file has same metadata as original
	# input hdf5 file

	base_file = read_samples(original_hdf5)

	mcmc_diagnostics_params = ['nLocalTemps','randomSeed']

	meta_dict = {}

	for colname, column in base_file.columns.items():
		meta_dict[colname] = column.meta

	for colname, column in new_posterior.columns.items():
		if colname in mcmc_diagnostics_params:
			column.meta = {'vary': OUTPUT}
			# these parameters are fixed within a run,
			# but doesn't have to be equal between runs.
		elif colname in meta_dict:
			column.meta = meta_dict[colname]
		elif 'cos'+colname in meta_dict:
			column.meta = meta_dict['cos'+colname]
		elif 'sin'+colname in meta_dict:
			column.meta = meta_dict['sin'+colname]
		elif 'log'+colname in meta_dict:
			column.meta = meta_dict['log'+colname]
		elif colname.startswith('chain_'):
			column.meta = {'vary': OUTPUT}
			# same argument as with mcmc_diagnostics_params
		else:
			column.meta = {'vary': FIXED}

	return new_posterior
コード例 #2
0
def reassign_metadata(new_posterior, original_hdf5):

	# Make sure output file has same metadata as original
	# input hdf5 file

	base_file = read_samples(original_hdf5)

	mcmc_diagnostics_params = ['nLocalTemps','randomSeed']

	meta_dict = {}

	for colname, column in base_file.columns.items():
		meta_dict[colname] = column.meta

	for colname, column in new_posterior.columns.items():
		if colname in mcmc_diagnostics_params:
			column.meta = {'vary': OUTPUT}
			# these parameters are fixed within a run,
			# but doesn't have to be equal between runs.
		elif colname in meta_dict:
			column.meta = meta_dict[colname]
		elif 'cos'+colname in meta_dict:
			column.meta = meta_dict['cos'+colname]
		elif 'sin'+colname in meta_dict:
			column.meta = meta_dict['sin'+colname]
		elif 'log'+colname in meta_dict:
			column.meta = meta_dict['log'+colname]
		elif colname.startswith('chain_'):
			column.meta = {'vary': OUTPUT}
			# same argument as with mcmc_diagnostics_params
		else:
			column.meta = {'vary': FIXED}

	return new_posterior
コード例 #3
0
def read_nested_from_hdf5(nested_path_list, strict_versions=True):
    headers = None
    input_arrays = []
    metadata = {}
    log_noise_evidences = []
    log_max_likelihoods = []
    nlive = []
    from lalinference.io import read_samples, extract_metadata

    for path in nested_path_list:
        if not os.path.isfile(path):
            print('Unable to open %s, skipping file' % (path))
            continue
        try:
            tab = read_samples(path, tablename=nested_dset_name)
            input_arrays.append(tab)
        except KeyError:
            print('Unable to read table from %s, skipping' % (path))
            continue

        # N.B.: This appends to metadata, log_noise_evidences, log_max_likelihoods, nlive, in addition to outputting run_identifier
        run_identifier = extract_metadata(path, metadata, log_noise_evidences,
                                          log_max_likelihoods, nlive,
                                          nested_dset_name, True,
                                          strict_versions)

    if len(input_arrays) == 0:
        print('No nested samples could be read from %s'.format(
            str(nested_path_list)))
        raise IOError

    # for metadata which is in a list, take the average.
    for level in metadata:
        for key in metadata[level]:
            if isinstance(metadata[level][key], list) and all(
                    isinstance(x, (int, float)) for x in metadata[level][key]):
                metadata[level][key] = mean(metadata[level][key])
            elif isinstance(metadata[level][key], list) and all(
                    isinstance(x, (str)) for x in metadata[level][key]):
                print(
                    "Warning: only printing the first of the %d entries found for metadata %s/%s. You can find the whole list in the headers of individual hdf5 output files\n"
                    % (len(metadata[level][key]), level, key))
                metadata[level][key] = metadata[level][key][0]
    log_noise_evidence = reduce(logaddexp, log_noise_evidences) - log(
        len(log_noise_evidences))
    log_max_likelihood = max(log_max_likelihoods)

    return input_arrays, log_noise_evidence, log_max_likelihood, metadata, nlive, run_identifier
コード例 #4
0
# convert nested samples to posterior samples
outpost = os.path.join(outdir, "{}_post.hdf".format(label))
runcmd = " ".join([n2p, "-p", outpost, outfile])
with sp.Popen(
        runcmd,
        stdout=sp.PIPE,
        stderr=sp.PIPE,
        shell=True,
        bufsize=1,
        universal_newlines=True,
) as p:
    for line in p.stdout:
        print(line, end="")

# get posterior samples
post = read_samples(outpost,
                    tablename=LALInferenceHDF5PosteriorSamplesDatasetName)
lp = len(post["H0"])
postsamples = np.zeros((lp, len(priors)))
for i, p in enumerate(priors.keys()):
    postsamples[:, i] = post[p.upper()]

# get evidence
hdf = h5py.File(outpost, "r")
a = hdf["lalinference"]["lalinference_nest"]
evsig = a.attrs["log_evidence"]
evnoise = a.attrs["log_noise_evidence"]
hdf.close()

# run bilby via the pe interface
runner = pe(
    data_file=hetfile,
コード例 #5
0
def read_nested_from_hdf5(nested_path_list, strict_versions=True):
    headers = None
    input_arrays = []
    metadata = {}
    log_noise_evidences = []
    log_max_likelihoods = []
    nlive = []
    from lalinference.io import read_samples

    def update_metadata(level, attrs, collision='raise'):
        """Updates the sub-dictionary 'key' of 'metadata' with the values from
        'attrs', while enforcing that existing values are equal to those with
        which the dict is updated.
        """
        if level not in metadata:
            metadata[level] = {}
        for key in attrs:
            if key in metadata[level]:
                    if collision == 'raise':
                        if attrs[key]!=metadata[level][key]:
                            if key == 'version' and not strict_versions:
                                continue
                            else:
                                raise ValueError(
                                    'Metadata mismtach on level %r for key %r:\n\t%r != %r'
                                    % (level, key, attrs[key], metadata[level][key]))
                    elif collision == 'append':
                        if isinstance(metadata[level][key], list):
                            metadata[level][key].append(attrs[key])
                        else:
                            metadata[level][key] = [metadata[level][key], attrs[key]]
                    elif collision == 'ignore':
                        pass
                    else:
                        raise ValueError('Invalid value for collision: %r' % collision)
            else:
                metadata[level][key] = attrs[key]
        return

    for path in nested_path_list:
        if not os.path.isfile(path):
                print('Unable to open %s, skipping file'%(path))
                continue
        try:
                tab = read_samples(path,path='lalinference/lalinference_nest/nested_samples')
                input_arrays.append(tab)
        except:
                print('Unable to read table from %s, skipping'%(path))
                continue
        
        with h5py.File(path, 'r') as hdf:
            # walk down the groups until the actual data is reached, storing
            # metadata for each step.
            current_level = '/lalinference'
            group = hdf[current_level]
            update_metadata(current_level, group.attrs)

            if len(hdf[current_level].keys()) != 1:
                raise KeyError('Multiple run-identifiers found: %r'
                               % list(hdf[current_level].keys()))
            # we ensured above that there is only one identifier in the group.
            run_identifier = list(hdf[current_level].keys())[0]

            current_level = '/lalinference/' + run_identifier
            group = hdf[current_level]
            update_metadata(current_level, group.attrs, collision='append')

            # store the noise evidence and max likelihood seperately for later use
            log_noise_evidences.append(group.attrs['log_noise_evidence'])
            log_max_likelihoods.append(group.attrs['log_max_likelihood'])
            nlive.append(group.attrs['number_live_points'])

            # storing the metadata under the posterior_group name simplifies
            # writing it into the output hdf file.
            current_level = '/lalinference/' + run_identifier + '/' + nested_dset_name
            current_level_posterior = '/lalinference/' + run_identifier + '/' + posterior_dset_name
            group = hdf[current_level]
            update_metadata(current_level_posterior, group.attrs, collision='ignore')

    # for metadata which is in a list, take the average.
    for level in metadata:
        for key in metadata[level]:
            if isinstance(metadata[level][key], list) and all(isinstance(x, (int,float)) for x in metadata[level][key]):
                metadata[level][key] = mean(metadata[level][key])

    log_noise_evidence = reduce(logaddexp, log_noise_evidences) - log(len(log_noise_evidences))
    log_max_likelihood = max(log_max_likelihoods)

    return input_arrays, log_noise_evidence, log_max_likelihood, metadata, nlive, run_identifier
コード例 #6
0
args = parser.parse_args()

# Late imports.
import re
from astropy.table import Table
from astropy.utils.misc import NumpyRNGContext
from matplotlib import pyplot as plt
import numpy as np
from lalinference import io
import lalinference.plot
from lalinference.bayestar.postprocess import find_injection_moc
from scipy.interpolate import interp1d

# Read input.
skymap = io.read_sky_map(args.skymap.name, moc=True)
chain = io.read_samples(args.samples.name)

# If required, downselect to a smaller number of posterior samples.
if args.max_points is not None:
    chain = Table(np.random.permutation(chain)[:args.max_points])

# Calculate P-P plot.
contours = np.asarray(args.contour)
result = find_injection_moc(skymap,
                            chain['ra'],
                            chain['dec'],
                            chain['dist'],
                            contours=1e-2 * contours)


def fmt(x, sigfigs, force_scientific=False):
コード例 #7
0
ファイル: comparitors.py プロジェクト: duncanmmacleod/cwinpy
def comparisons(label, outdir, grid, priors, cred=0.9):
    """
    Perform comparisons of the evidence, parameter values, confidence
    intervals, and Kolmogorov-Smirnov test between samples produced with
    lalapps_pulsar_parameter_estimation_nested and cwinpy.
    """

    lppenfile = os.path.join(outdir, "{}_post.hdf".format(label))

    # get posterior samples
    post = read_samples(lppenfile,
                        tablename=LALInferenceHDF5PosteriorSamplesDatasetName)

    # get uncertainty on ln(evidence)
    info = h5py.File(lppenfile)["lalinference"]["lalinference_nest"].attrs[
        "information_nats"]
    nlive = h5py.File(lppenfile)["lalinference"]["lalinference_nest"].attrs[
        "number_live_points"]
    evsig = h5py.File(
        lppenfile)["lalinference"]["lalinference_nest"].attrs["log_evidence"]
    evnoise = h5py.File(lppenfile)["lalinference"]["lalinference_nest"].attrs[
        "log_noise_evidence"]
    everr = np.sqrt(info / nlive)  # the uncertainty on the evidence

    # read in cwinpy results
    result = read_in_result(outdir=outdir, label=label)

    # comparison file
    comparefile = os.path.join(outdir, "{}_compare.txt".format(label))

    # get grid-based evidence
    if grid is not None:
        grid_evidence = grid.log_evidence

    # set values to output
    values = 64 * [None]
    values[0:4] = evsig, evnoise, (evsig - evnoise), everr
    values[4:8] = (
        result.log_evidence,
        result.log_noise_evidence,
        result.log_bayes_factor,
        result.log_evidence_err,
    )
    if grid is not None:
        values[8] = "{0:.3f}".format(grid_evidence)
        values[9] = "{0:.3f}".format(grid_evidence - result.log_noise_evidence)
    else:
        values[8:10] = ("N/A", "N/A")  # no values supplied

    # output parameter means standard deviations, and credible intervals
    idx = 10
    for method in ["lalapps", "cwinpy"]:
        values[idx + 9] = int(cred * 100)
        for p in priors.keys():
            samples = post[
                p.upper()] if method == "lalapps" else result.posterior[p]

            # convert iota to cos(iota)
            if p == "iota":
                samples = np.cos(samples)

            mean = samples.mean()
            std = samples.std()
            low, high = credible_interval(samples, ci=cred)
            if p == "h0":
                exponent = int(np.floor(np.log10(mean)))
                values[idx] = mean / 10**exponent
                values[idx + 1] = std / 10**exponent
                values[idx + 2] = exponent
                values[idx + 10] = low / 10**exponent
                values[idx + 11] = high / 10**exponent
                values[idx + 12] = exponent
                idx += 3
            else:
                values[idx] = mean
                values[idx + 1] = std
                values[idx + 10] = low
                values[idx + 11] = high
                idx += 2
        idx += 10

    # output parameter maximum a-posteriori points
    maxidx = (result.posterior["log_likelihood"] +
              result.posterior["log_prior"]).idxmax()
    maxidxlppen = (post["logL"] + post["logPrior"]).argmax()
    for method in ["lalapps", "cwinpy"]:
        for p in priors.keys():
            maxpval = (post[p.upper()][maxidxlppen]
                       if method == "lalapps" else result.posterior[p][maxidx])
            if p == "h0":
                exponent = int(np.floor(np.log10(maxpval)))
                values[idx] = maxpval / 10**exponent
                values[idx + 1] = exponent
                idx += 2
            else:
                values[idx] = maxpval
                idx += 1
        if result.use_ratio:
            # convert likelihood ratio back to likelihood
            values[idx] = (post["logL"][maxidxlppen] if method == "lalapps"
                           else (result.posterior["log_likelihood"][maxidx] +
                                 result.log_noise_evidence))
        else:
            values[idx] = (post["logL"][maxidxlppen] if method == "lalapps"
                           else result.posterior["log_likelihood"][maxidx])
        idx += 1

    # calculate the Kolmogorov-Smirnov test for each 1d marginalised distribution,
    # and the Jensen-Shannon divergence, from the two codes. Output the
    # combined p-value of the KS test statistic over all parameters, and the
    # maximum Jensen-Shannon divergence over all parameters.
    values[idx] = np.inf
    pvalues = []
    jsvalues = []
    for p in priors.keys():
        _, pvalue = ks_2samp(post[p.upper()], result.posterior[p])
        pvalues.append(pvalue)

        # calculate J-S divergence
        bins = np.linspace(
            np.min([np.min(post[p.upper()]),
                    np.min(result.posterior[p])]),
            np.max([np.max(post[p.upper()]),
                    np.max(result.posterior[p])]),
            100,
        )

        hp, _ = np.histogram(post[p.upper()], bins=bins, density=True)
        hq, _ = np.histogram(result.posterior[p], bins=bins, density=True)
        jsvalues.append(jensenshannon(hp, hq)**2)

    values[idx] = combine_pvalues(pvalues)[1]
    idx += 1
    values[idx] = np.max(jsvalues)

    values[idx + 1] = cwinpy.__version__
    values[idx + 2] = bilby.__version__

    with open(comparefile, "w") as fp:
        fp.write(FILETEXT.format(*values))
コード例 #8
0
progress.update(-1, 'Preparing projection')

if opts.align_to is None or opts.input.name == opts.align_to.name:
    prob2, mu2, sigma2, norm2 = prob, mu, sigma, norm
else:
    (prob2, mu2, sigma2, norm2), _ = io.read_sky_map(
        opts.align_to.name, distances=True)
if opts.max_distance is None:
    max_distance = 2.5 * marginal_ppf(0.5, prob2, mu2, sigma2, norm2)
else:
    max_distance = opts.max_distance
R = np.ascontiguousarray(principal_axes(prob2, mu2, sigma2))

if opts.chain:
    chain = io.read_samples(opts.chain.name)
    chain = np.dot(R.T, (hp.ang2vec(
        0.5 * np.pi - chain['dec'], chain['ra'])
        * np.atleast_2d(chain['dist']).T).T)

fig = plt.figure(frameon=False)
n = 1 if opts.projection else 2
gs = gridspec.GridSpec(
    n, n, left=0.01, right=0.99, bottom=0.01, top=0.99,
    wspace=0.05, hspace=0.05)

imgwidth = int(opts.dpi * opts.figure_width / n)
s = np.linspace(-max_distance, max_distance, imgwidth)
xx, yy = np.meshgrid(s, s)
dtheta = 0.5 * np.pi / nside / 4
コード例 #9
0
progress.update(-1, 'Preparing projection')

if opts.align_to is None or opts.input.name == opts.align_to.name:
    prob2, mu2, sigma2, norm2 = prob, mu, sigma, norm
else:
    (prob2, mu2, sigma2, norm2), _ = io.read_sky_map(
        opts.align_to.name, distances=True)
if opts.max_distance is None:
    mean, std = parameters_to_marginal_moments(prob2, mu2, sigma2)
    max_distance = mean + 2.5 * std
else:
    max_distance = opts.max_distance
R = np.ascontiguousarray(principal_axes(prob2, mu2, sigma2))

if opts.chain:
    chain = io.read_samples(opts.chain.name)
    chain = np.dot(R.T, (hp.ang2vec(
        0.5 * np.pi - chain['dec'], chain['ra'])
        * np.atleast_2d(chain['dist']).T).T)

fig = plt.figure(frameon=False)
n = 1 if opts.projection else 2
gs = gridspec.GridSpec(
    n, n, left=0.01, right=0.99, bottom=0.01, top=0.99,
    wspace=0.05, hspace=0.05)

imgwidth = int(opts.dpi * opts.figure_width / n)
s = np.linspace(-max_distance, max_distance, imgwidth)
xx, yy = np.meshgrid(s, s)
dtheta = 0.5 * np.pi / nside / 4
コード例 #10
0
# Late imports.
import re
from astropy.table import Table
from astropy.utils.misc import NumpyRNGContext
from matplotlib import pyplot as plt
import numpy as np
from lalinference import io
import lalinference.plot
from lalinference.bayestar.postprocess import find_injection_moc
from scipy.interpolate import interp1d


# Read input.
skymap = io.read_sky_map(args.skymap.name, moc=True)
chain = io.read_samples(args.samples.name)

# If required, downselect to a smaller number of posterior samples.
if args.max_points is not None:
    chain = Table(np.random.permutation(chain)[:args.max_points])

# Calculate P-P plot.
contours = np.asarray(args.contour)
result = find_injection_moc(skymap, chain['ra'], chain['dec'], chain['dist'],
                            contours=1e-2 * contours)


def fmt(x, sigfigs, force_scientific=False):
    """Round and format a number in scientific notation."""
    places = sigfigs - int(np.floor(np.log10(x)))
    x_rounded = np.around(x, places)