Example #1
0
def load_args(ini="injection_study_gwpop_pipe.ini"):
    parser = gwpopulation_pipe.main.create_parser()
    args, _ = parser.parse_known_args([ini])
    args.dat_samples_regex = "small_test_dir/*.dat"
    args.run_dir = "small_test_dir/test_out"
    args.log_dir = "small_test_dir/test_out/logs"

    complete_ini_file = f"{args.run_dir}/{args.label}_config_complete.ini"
    args.ini_file = complete_ini_file
    gwpopulation_pipe.main.make_dag(args)
    gwpopulation_pipe.main.make_submit_files(args)
    parser.write_to_file(
        filename=complete_ini_file,
        args=args,
        overwrite=True,
        include_description=False,
    )
    with open(complete_ini_file, "r") as ff:
        content = ff.readlines()
    for ii, line in enumerate(content):
        content[ii] = gwpopulation_pipe.main.strip_quotes(line)
    with open(complete_ini_file, "w") as ff:
        ff.writelines(content)

    logger.info(args)

    return args
Example #2
0
    def posterior_predictive_resample(self, samples):
        """
        Resample the original single event posteriors to use the PPD from each
        of the other events as the prior.

        There may be something weird going on with rate.

        Parameters
        ----------
        samples: pd.DataFrame, dict, list
            The samples to do the weighting over, typically the posterior from
            some run.
        Returns
        -------
        new_samples: dict
            Dictionary containing the weighted posterior samples for each of
            the events.
        """
        if isinstance(samples, pd.DataFrame):
            samples = [dict(samples.iloc[ii]) for ii in range(len(samples))]
        elif isinstance(samples, dict):
            samples = [samples]
        weights = xp.zeros((self.n_posteriors, self.samples_per_posterior))
        event_weights = xp.zeros(self.n_posteriors)
        for sample in tqdm(samples):
            self.parameters.update(sample.copy())
            self.parameters, added_keys = self.conversion_function(
                self.parameters)
            new_weights = self.hyper_prior.prob(
                self.data) / self.sampling_prior
            event_weights += xp.mean(new_weights, axis=-1)
            new_weights = (new_weights.T / xp.sum(new_weights, axis=-1)).T
            weights += new_weights
            if added_keys is not None:
                for key in added_keys:
                    self.parameters.pop(key)
        weights = (weights.T / xp.sum(weights, axis=-1)).T
        new_idxs = xp.empty_like(weights, dtype=int)
        for ii in range(self.n_posteriors):
            new_idxs[ii] = xp.asarray(
                np.random.choice(
                    range(self.samples_per_posterior),
                    size=self.samples_per_posterior,
                    replace=True,
                    p=to_numpy(weights[ii]),
                ))
        new_samples = {
            key: xp.vstack([
                self.data[key][ii, new_idxs[ii]]
                for ii in range(self.n_posteriors)
            ])
            for key in self.data
        }
        event_weights = list(event_weights)
        weight_string = " ".join(
            [f"{float(weight):.1f}" for weight in event_weights])
        logger.info(
            f"Resampling done, sum of weights for events are {weight_string}")
        return new_samples
Example #3
0
def run_data_collection(args):
    posts, events = gwpopulation_pipe.data_collection.gather_posteriors(
        args=args)
    logger.info(
        f"Using {len(posts)} events, final event list is: {', '.join(events)}."
    )
    posterior_file = f"{args.data_label}.pkl"
    logger.info(f"Saving posteriors to {posterior_file}")
    filename = os.path.join(args.run_dir, "posteriors_list", posterior_file)
    pd.to_pickle(posts, filename)
Example #4
0
    def __init__(self,
                 posteriors,
                 hyper_prior,
                 sampling_prior=None,
                 ln_evidences=None,
                 max_samples=1e100,
                 selection_function=lambda args: 1,
                 conversion_function=lambda args: (args, None),
                 cupy=True):
        """
        Parameters
        ----------
        posteriors: list
            An list of pandas data frames of samples sets of samples.
            Each set may have a different size.
            These can contain a `prior` column containing the original prior
            values.
        hyper_prior: `bilby.hyper.model.Model`
            The population model, this can alternatively be a function.
        sampling_prior: `bilby.hyper.model.Model` *DEPRECATED*
            The sampling prior, this can alternatively be a function.
        ln_evidences: list, optional
            Log evidences for single runs to ensure proper normalisation
            of the hyperparameter likelihood. If not provided, the original
            evidences will be set to 0. This produces a Bayes factor between
            the sampling power_prior and the hyperparameterised model.
        selection_function: func
            Function which evaluates your population selection function.
        conversion_function: func
            Function which converts a dictionary of sampled parameter to a
            dictionary of parameters of the population model.
        max_samples: int, optional
            Maximum number of samples to use from each set.
        cupy: bool
            If True and a compatible CUDA environment is available,
            cupy will be used for performance.
            Note: this requires setting up your hyper_prior properly.
        """
        if cupy and not CUPY_LOADED:
            logger.warning('Cannot import cupy, falling back to numpy.')

        self.samples_per_posterior = max_samples
        self.data = self.resample_posteriors(posteriors,
                                             max_samples=max_samples)

        if not isinstance(hyper_prior, Model):
            hyper_prior = Model([hyper_prior])
        self.hyper_prior = hyper_prior
        Likelihood.__init__(self, hyper_prior.parameters)

        if sampling_prior is not None:
            logger.warning('Passing a sampling_prior is deprecated. This '
                           'should be passed as a column in the posteriors.')
            if not isinstance(sampling_prior, Model):
                sampling_prior = Model([sampling_prior])
            self.sampling_prior = sampling_prior.prob(self.data)
        elif 'prior' in self.data:
            self.sampling_prior = self.data.pop('prior')
        else:
            logger.info('No prior values provided, defaulting to 1.')
            self.sampling_prior = 1

        if ln_evidences is not None:
            self.total_noise_evidence = np.sum(ln_evidences)
        else:
            self.total_noise_evidence = np.nan

        self.conversion_function = conversion_function
        self.selection_function = selection_function

        self.n_posteriors = len(posteriors)
        self.samples_factor =\
            - self.n_posteriors * np.log(self.samples_per_posterior)
Example #5
0
    def __init__(
            self,
            posteriors,
            hyper_prior,
            sampling_prior=None,
            ln_evidences=None,
            max_samples=1e100,
            selection_function=lambda args: 1,
            conversion_function=lambda args: (args, None),
            cupy=True,
    ):
        """
        Parameters
        ----------
        posteriors: list
            An list of pandas data frames of samples sets of samples.
            Each set may have a different size.
            These can contain a `prior` column containing the original prior
            values.
        hyper_prior: `bilby.hyper.model.Model`
            The population model, this can alternatively be a function.
        sampling_prior: array-like *DEPRECATED*
            The sampling prior, this can alternatively be a function.
            THIS WILL BE REMOVED IN THE NEXT RELEASE.
        ln_evidences: list, optional
            Log evidences for single runs to ensure proper normalisation
            of the hyperparameter likelihood. If not provided, the original
            evidences will be set to 0. This produces a Bayes factor between
            the sampling power_prior and the hyperparameterised model.
        selection_function: func
            Function which evaluates your population selection function.
        conversion_function: func
            Function which converts a dictionary of sampled parameter to a
            dictionary of parameters of the population model.
        max_samples: int, optional
            Maximum number of samples to use from each set.
        cupy: bool
            If True and a compatible CUDA environment is available,
            cupy will be used for performance.
            Note: this requires setting up your hyper_prior properly.
        """
        if cupy and not CUPY_LOADED:
            logger.warning("Cannot import cupy, falling back to numpy.")

        self.samples_per_posterior = max_samples
        self.data = self.resample_posteriors(posteriors,
                                             max_samples=max_samples)

        if isinstance(hyper_prior, types.FunctionType):
            hyper_prior = Model([hyper_prior])
        elif not (hasattr(hyper_prior, 'parameters')
                  and callable(getattr(hyper_prior, 'prob'))):
            raise AttributeError(
                "hyper_prior must either be a function, "
                "or a class with attribute 'parameters' and method 'prob'")
        self.hyper_prior = hyper_prior
        Likelihood.__init__(self, hyper_prior.parameters)

        if sampling_prior is not None:
            raise ValueError(
                "Passing a sampling_prior is deprecated and will be removed "
                "in the next release. This should be passed as a 'prior' "
                "column in the posteriors.")
        elif "prior" in self.data:
            self.sampling_prior = self.data.pop("prior")
        else:
            logger.info("No prior values provided, defaulting to 1.")
            self.sampling_prior = 1

        if ln_evidences is not None:
            self.total_noise_evidence = np.sum(ln_evidences)
        else:
            self.total_noise_evidence = np.nan

        self.conversion_function = conversion_function
        self.selection_function = selection_function

        self.n_posteriors = len(posteriors)
Example #6
0
def plot_results_page(results_dir: str, df: pd.DataFrame):
    """
    Given a directory of Bibly result.json and corner.png


    Makes pages of
        * injection and their PEs' summary table
        * injected mass distribution
        * PE snr and lnBF distribution
        * pp-test

    Combines the pages into a net summary page


    :param results_dir:
    :param df:
    :return:
    """

    save_dir = results_dir

    # plotting separate summary graphs and tables
    mass_scatter_path = plot_mass_scatter(df,
                                          filename=os.path.join(
                                              save_dir, "mass_scatter.html"))

    mass_distribution_path = plot_mass_distribution(df,
                                                    filename=os.path.join(
                                                        save_dir,
                                                        "mass_distribution"))
    mass_distribution_is_image = is_file_image(mass_distribution_path)

    data_table_path = plot_data_table(df,
                                      filename=os.path.join(
                                          save_dir, "summary_table.html"))
    analysis_stats_path = plot_analysis_statistics_data(
        df, filename=os.path.join(save_dir, "analysis_histograms.html"))

    pp_test_path = plot_pp_test(results_dir)
    pp_test_is_img = is_file_image(pp_test_path)

    hyperpe_z_normal = bilby.core.result.read_in_result(
        os.path.join(
            save_dir,
            "hyper_pe/normalMassDistribution_result.json")).log_evidence
    hyperpe_z_uniform = bilby.core.result.read_in_result(
        os.path.join(
            save_dir,
            "hyper_pe/uniformMassDistribution_result.json")).log_evidence

    # building summary page
    sections = [
        SectionTemplate(
            title="Injected Masses",
            html_path=mass_scatter_path,
            height="500",
            width="90%",
        ),
        SectionTemplate(
            title="",
            html_path=mass_distribution_path,
            height="500",
            width="90%",
            is_img=mass_distribution_is_image,
        ),
        SectionTemplate(
            title="Summary Table",
            html_path=data_table_path,
            height="500",
            text=
            f"{len(df)} injections. Click on the Injection Numbers to go to the corresponding corner plot.",
        ),
        SectionTemplate(title="PE Statistics",
                        html_path=analysis_stats_path,
                        height="500"),
        SectionTemplate(
            title="P-P test",
            html_path=pp_test_path,
            width="50%",
            height="50%",
            is_img=pp_test_is_img,
        ),
        SectionTemplate(
            title="Duty Cycle",
            html_path="hyper_pe/DutyCycle_corner.png",
            width="50%",
            height="50%",
            is_img=True,
        ),
        SectionTemplate(
            title="Mass Distribution: Normal distribution",
            html_path="hyper_pe/normalMassDistribution_corner.png",
            width="50%",
            height="50%",
            is_img=True,
        ),
        SectionTemplate(
            title="Mass Distribution: Uniform distribution",
            html_path="hyper_pe/uniformMassDistribution_corner.png",
            width="50%",
            height="50%",
            is_img=True,
            text=
            f"Log BF (uniform - normal): {hyperpe_z_uniform-hyperpe_z_normal}",
        ),
    ]
    summary_page = SummaryTemplate(title="IMBH Injection PE Summary",
                                   sections=sections)

    report_file_name = os.path.join(save_dir, "summary_report.html")
    with open(report_file_name, "w") as report_file:
        report_file.write(summary_page.render())
        report_file.close()

    logger.info("File saved at " + report_file_name)
Example #7
0
def run_data_analysis(args):
    parser = gwpopulation_pipe.data_analysis.create_parser()
    args, unknown_args = parser.parse_known_args(
        "/home/avi.vajpeyi/projects/agn_phenomenological_model/simulated_events/small_test_dir/test_out/simulated_pop_config_complete.ini --prior /home/avi.vajpeyi/projects/agn_phenomenological_model/population_inference/priors/mass_c_iid_mag_agn_tilt_powerlaw_redshift.prior --label simulated_pop_mass_c_iid_mag_agn_tilt_powerlaw_redshift --models SmoothedMassDistribution --models iid_spin_magnitude --models agn_spin_orientation --models gwpopulation.models.redshift.PowerLawRedshift --vt-models SmoothedMassDistribution --vt-models gwpopulation.models.redshift.PowerLawRedshift"
        .split())
    posterior_file = os.path.join(args.run_dir, "posteriors_list",
                                  f"{args.data_label}.pkl")
    posteriors = pd.read_pickle(posterior_file)
    for ii, post in enumerate(posteriors):
        posteriors[ii] = post[post["redshift"] < args.max_redshift]
    gwpopulation_pipe.data_analysis.vt_helper.N_EVENTS = len(posteriors)
    gwpopulation_pipe.data_analysis.vt_helper.max_redshift = args.max_redshift
    logger.info(f"Loaded {len(posteriors)} posteriors")
    event_ids = list()
    with open(
            os.path.join(args.run_dir, "posteriors_list",
                         f"{args.data_label}_posterior_files.txt"),
            "r",
    ) as ff:
        for line in ff.readlines():
            event_ids.append(line.split(":")[0])

    logger.info(f"VT Models = {args.vt_models}")
    logger.info(f"Tilt Models = {args.tilt_models}")

    hyper_prior = gwpopulation_pipe.data_analysis.load_prior(args)
    model = gwpopulation_pipe.data_analysis.load_model(args)
    selection = gwpopulation_pipe.data_analysis.load_vt(args)

    likelihood = gwpopulation_pipe.data_analysis.create_likelihood(
        args, posteriors, model, selection)
    likelihood.input_parameters.update(hyper_prior.sample())
    likelihood.log_likelihood_ratio()

    if args.injection_file is not None:
        injections = pd.read_json(args.injection_file)
        injection_parameters = dict(injections.iloc[args.injection_index])
    else:
        injection_parameters = None

    logger.info("Starting sampling")
    result = gwpopulation_pipe.data_analysis.run_sampler(
        likelihood=likelihood,
        priors=hyper_prior,
        label=args.label,
        sampler=args.sampler_name,
        outdir=os.path.join(args.run_dir, "result"),
        injection_parameters=injection_parameters,
        **gwpopulation_pipe.data_analysis.get_sampler_kwargs(args),
    )
    result.prior = args.prior
    result.models = args.models
    result.event_ids = event_ids

    logger.info("Computing rate posterior")
    gwpopulation_pipe.data_analysis.compute_rate_posterior(
        posterior=result.posterior, selection=selection)
    result.save_to_file(extension="json", overwrite=True)
    logger.info("Resampling single event posteriors")
    gwpopulation_pipe.data_analysis.resample_single_event_posteriors(
        likelihood, result, save=True)
    result.plot_corner(parameters=result.search_parameter_keys +
                       ["log_10_rate"])
Example #8
0
def main():
    args = load_args()
    logger.info("BEGINNING COLLECTION")
    run_data_collection(args)
    logger.info("BEGINNING ANALYSIS")
    run_data_analysis(args)
def plot_interferometer_waveform_posterior(res,
                                           interferometer,
                                           level=0.9,
                                           n_samples=None,
                                           start_time=None,
                                           end_time=None,
                                           outdir='.',
                                           signals_to_plot={}):
    """
    Plot the posterior for the waveform in the frequency domain and
    whitened time domain.

    If the strain data is passed that will be plotted.

    If injection parameters can be found, the injection will be plotted.

    Parameters
    ==========
    interferometer: (str, bilby.gw.detector.interferometer.Interferometer)
        detector to use, if an Interferometer object is passed the data
        will be overlaid on the posterior
    level: float, optional
        symmetric confidence interval to show, default is 90%
    n_samples: int, optional
        number of samples to use to calculate the median/interval
        default is all
    start_time: float, optional
        the amount of time before merger to begin the time domain plot.
        the merger time is defined as the mean of the geocenter time
        posterior. Default is - 0.4
    end_time: float, optional
        the amount of time before merger to end the time domain plot.
        the merger time is defined as the mean of the geocenter time
        posterior. Default is 0.2

    Returns
    =======
    fig: figure-handle, only is save=False

    Notes
    -----
    To reduce the memory footprint we decimate the frequency domain
    waveforms to have ~4000 entries. This should be sufficient for decent
    resolution.
    """

    DATA_COLOR = "#ff7f0e"
    WAVEFORM_COLOR = "#1f77b4"
    INJECTION_COLOR = "#000000"

    if not isinstance(interferometer, bilby.gw.detector.Interferometer):
        raise TypeError('interferometer type must be Interferometer')

    logger.info("Generating waveform figure for {}".format(
        interferometer.name))

    if n_samples is None:
        samples = res.posterior
    else:
        samples = res.posterior.sample(n_samples, replace=False)

    if start_time is None:
        start_time = -0.4
    start_time = np.mean(samples.geocent_time) + start_time
    if end_time is None:
        end_time = 0.2
    end_time = np.mean(samples.geocent_time) + end_time

    time_idxs = ((interferometer.time_array >= start_time) &
                 (interferometer.time_array <= end_time))
    frequency_idxs = np.where(interferometer.frequency_mask)[0]
    logger.debug("Frequency mask contains {} values".format(
        len(frequency_idxs)))
    frequency_idxs = frequency_idxs[::max(1, len(frequency_idxs) // 4000)]
    logger.debug("Downsampling frequency mask to {} values".format(
        len(frequency_idxs)))
    plot_times = interferometer.time_array[time_idxs]
    plot_times -= interferometer.strain_data.start_time
    start_time -= interferometer.strain_data.start_time
    end_time -= interferometer.strain_data.start_time
    plot_frequencies = interferometer.frequency_array[frequency_idxs]

    waveform_arguments = res.waveform_arguments
    waveform_arguments['waveform_approximant'] = "IMRPhenomPv2"

    waveform_generator = res.waveform_generator_class(
        duration=res.duration,
        sampling_frequency=res.sampling_frequency,
        start_time=res.start_time,
        frequency_domain_source_model=res.frequency_domain_source_model,
        parameter_conversion=res.parameter_conversion,
        waveform_arguments=waveform_arguments)

    old_font_size = rcParams["font.size"]
    rcParams["font.size"] = 20
    fig, axs = plt.subplots(2,
                            1,
                            gridspec_kw=dict(height_ratios=[1.5, 1]),
                            figsize=(16, 12.5))

    axs[0].loglog(plot_frequencies,
                  asd_from_freq_series(
                      interferometer.frequency_domain_strain[frequency_idxs],
                      1 / interferometer.strain_data.duration),
                  color=DATA_COLOR,
                  label='Data',
                  alpha=0.3)
    axs[0].loglog(
        plot_frequencies,
        interferometer.amplitude_spectral_density_array[frequency_idxs],
        color=DATA_COLOR,
        label='ASD')
    axs[1].plot(
        plot_times,
        infft(interferometer.whitened_frequency_domain_strain *
              np.sqrt(2. / interferometer.sampling_frequency),
              sampling_frequency=interferometer.strain_data.sampling_frequency)
        [time_idxs],
        color=DATA_COLOR,
        alpha=0.3)
    logger.debug('Plotted interferometer data.')

    fd_waveforms = list()
    td_waveforms = list()
    for _, params in tqdm(samples.iterrows(),
                          desc="Processing Samples",
                          total=len(samples)):
        try:
            params = dict(params)
            wf_pols = waveform_generator.frequency_domain_strain(params)
            fd_waveform = interferometer.get_detector_response(wf_pols, params)
            fd_waveforms.append(fd_waveform[frequency_idxs])
            td_waveform = infft(
                fd_waveform * np.sqrt(2. / interferometer.sampling_frequency) /
                interferometer.amplitude_spectral_density_array,
                res.sampling_frequency)[time_idxs]
        except Exception as e:
            logger.debug(f"ERROR: {e}\nparams: {params}")
            pass
        td_waveforms.append(td_waveform)
    fd_waveforms = asd_from_freq_series(
        fd_waveforms, 1 / interferometer.strain_data.duration)
    td_waveforms = np.array(td_waveforms)

    delta = (1 + level) / 2
    upper_percentile = delta * 100
    lower_percentile = (1 - delta) * 100
    logger.debug('Plotting posterior between the {} and {} percentiles'.format(
        lower_percentile, upper_percentile))

    lower_limit = np.mean(fd_waveforms, axis=0)[0] / 1e3
    axs[0].loglog(plot_frequencies,
                  np.mean(fd_waveforms, axis=0),
                  color=WAVEFORM_COLOR,
                  label='Mean reconstructed')
    axs[0].fill_between(plot_frequencies,
                        np.percentile(fd_waveforms, lower_percentile, axis=0),
                        np.percentile(fd_waveforms, upper_percentile, axis=0),
                        color=WAVEFORM_COLOR,
                        label='{}\% credible interval'.format(
                            int(upper_percentile - lower_percentile)),
                        alpha=0.3)
    axs[1].plot(plot_times,
                np.mean(td_waveforms, axis=0),
                color=WAVEFORM_COLOR)
    axs[1].fill_between(plot_times,
                        np.percentile(td_waveforms, lower_percentile, axis=0),
                        np.percentile(td_waveforms, upper_percentile, axis=0),
                        color=WAVEFORM_COLOR,
                        alpha=0.3)

    if len(signals_to_plot) > 0:
        for d in signals_to_plot:
            params = d['params']
            label = d['label']
            col = d['color']
            try:
                hf_inj = waveform_generator.frequency_domain_strain(params)
                hf_inj_det = interferometer.get_detector_response(
                    hf_inj, params)
                ht_inj_det = infft(
                    hf_inj_det *
                    np.sqrt(2. / interferometer.sampling_frequency) /
                    interferometer.amplitude_spectral_density_array,
                    res.sampling_frequency)[time_idxs]

                axs[0].loglog(plot_frequencies,
                              asd_from_freq_series(
                                  hf_inj_det[frequency_idxs],
                                  1 / interferometer.strain_data.duration),
                              label=label,
                              linestyle=':',
                              color=col)
                axs[1].plot(plot_times, ht_inj_det, linestyle=':', color=col)
                logger.debug('Plotted injection.')
            except IndexError as e:
                logger.info(
                    'Failed to plot injection with message {}.'.format(e))

    f_domain_x_label = "$f [\\mathrm{Hz}]$"
    f_domain_y_label = "$\\mathrm{ASD} \\left[\\mathrm{Hz}^{-1/2}\\right]$"
    t_domain_x_label = "$t - {} [s]$".format(
        interferometer.strain_data.start_time)
    t_domain_y_label = "Whitened Strain"

    axs[0].set_xlim(interferometer.minimum_frequency,
                    interferometer.maximum_frequency)
    axs[1].set_xlim(start_time, end_time)
    axs[0].set_ylim(lower_limit)
    axs[0].set_xlabel(f_domain_x_label)
    axs[0].set_ylabel(f_domain_y_label)
    axs[1].set_xlabel(t_domain_x_label)
    axs[1].set_ylabel(t_domain_y_label)
    axs[0].legend(loc='lower left', ncol=2)

    filename = f"{outdir}/{res.label}_{interferometer.name}_waveform.png"

    plt.tight_layout()
    fig.savefig(fname=filename, dpi=600)
    plt.close()
    logger.info("Waveform figure saved to {}".format(filename))
    rcParams["font.size"] = old_font_size