Beispiel #1
0
 def assert_correct_scaling(pipeline_cfg, fixed_errors=False):
     """Run the pipeline and assert that scaling by a factor of two is correct."""
     dmaker = DistributionMaker([pipeline_cfg])
     out = dmaker.get_outputs(return_sum="true")[0]
     dmaker.pipelines[0].params.weight_scale = 2.0
     out2 = dmaker.get_outputs(return_sum="true")[0]
     if fixed_errors:
         # this is special: We expect that the nominal counts are multiplied, but
         # that hte errors stay fixed (applies to set_variance errors)
         assert_array_equal(out.nominal_values * 2.0, out2.nominal_values)
         assert_array_equal(out.std_devs, out2.std_devs)
     else:
         assert out * 2.0 == out2
Beispiel #2
0
def test_kde_bootstrapping(verbosity=Levels.WARN):
    """Unit test for the kde stage."""

    set_verbosity(verbosity)

    test_cfg = deepcopy(TEST_CONFIGS.pipe_cfg)
    test_cfg[("data", "toy_event_generator")] = deepcopy(
        TEST_CONFIGS.event_generator_cfg
    )
    test_cfg[("aeff", "weight")] = deepcopy(TEST_CONFIGS.aeff_cfg)
    test_cfg[("utils", "kde")] = deepcopy(TEST_CONFIGS.kde_cfg)

    # get map, but without the linearization
    test_cfg[("utils", "kde")]["linearize_log_dims"] = False
    dmaker = DistributionMaker([test_cfg])
    map_baseline_no_linearization = dmaker.get_outputs(return_sum=True)[0]

    # get a baseline (with linearization, which we will use from here on out)
    test_cfg[("utils", "kde")]["linearize_log_dims"] = True
    dmaker = DistributionMaker([test_cfg])
    map_baseline = dmaker.get_outputs(return_sum=True)[0]
    logging.debug(f"Baseline KDE'd map:\n{map_baseline}")

    # assert that linearization make a difference at all
    total_no_lin = np.sum(map_baseline_no_linearization.nominal_values)
    total_with_lin = np.sum(map_baseline.nominal_values)
    assert not (total_no_lin == total_with_lin)
    # but also that the difference isn't huge (< 5% difference in total bin count)
    # --> This will fail if one forgets to *not* take the log when linearization
    #     is turned off, for example. In that case, most bins will be empty, because
    #     the binning would be lin while the KDE would be log.
    assert np.abs(total_no_lin / total_with_lin - 1.0) < 0.05
    # Make sure that different seeds produce different maps, and that the same seed will
    # produce the same map.
    # We enable bootstrapping now, without re-loading everything, to save time.
    dmaker.pipelines[0].output_key = ("weights", "errors")
    dmaker.pipelines[0].stages[-1].bootstrap = True

    map_seed0 = dmaker.get_outputs(return_sum=True)[0]
    dmaker.pipelines[0].stages[-1].bootstrap_seed = 1
    map_seed1 = dmaker.get_outputs(return_sum=True)[0]

    logging.debug(f"Map with seed 0 is:\n{map_seed0}")
    logging.debug(f"Map with seed 1 is:\n{map_seed1}")

    assert not map_seed0 == map_seed1

    dmaker.pipelines[0].stages[-1].bootstrap_seed = 0
    map_seed0_reprod = dmaker.get_outputs(return_sum=True)[0]

    assert map_seed0 == map_seed0_reprod

    logging.info("<< PASS : kde_bootstrapping >>")
Beispiel #3
0
def test_kde_bootstrapping(verbosity=Levels.WARN):
    """Unit test for the kde stage."""

    set_verbosity(verbosity)

    example_cfg = parse_pipeline_config("settings/pipeline/example.cfg")

    # We have to remove containers with too few events, otherwise the KDE fails simply
    # because too few distinct events are in one of the PID channels after bootstrapping.
    example_cfg[("data", "simple_data_loader")]["output_names"] = [
        "numu_cc",
        "numubar_cc",
    ]

    kde_stage_cfg = OrderedDict()
    kde_stage_cfg["apply_mode"] = example_cfg[("utils", "hist")]["apply_mode"]
    kde_stage_cfg["calc_mode"] = "events"
    kde_stage_cfg["bootstrap"] = False
    kde_stage_cfg["bootstrap_seed"] = 0
    kde_stage_cfg["bootstrap_niter"] = 5

    kde_pipe_cfg = deepcopy(example_cfg)

    # Replace histogram stage with KDE stage
    del kde_pipe_cfg[("utils", "hist")]
    kde_pipe_cfg[("utils", "kde")] = kde_stage_cfg

    # no errors in baseline since there is no bootstrapping enabled
    kde_pipe_cfg["pipeline"]["output_key"] = "weights"

    # get a baseline
    dmaker = DistributionMaker([kde_pipe_cfg])
    map_baseline = dmaker.get_outputs(return_sum=True)[0]
    logging.debug(f"Baseline KDE'd map:\n{map_baseline}")

    # Make sure that different seeds produce different maps, and that the same seed will
    # produce the same map.
    # We enable bootstrapping now, without re-loading everything, to save time.
    dmaker.pipelines[0].output_key = ("weights", "errors")
    dmaker.pipelines[0].stages[-1].bootstrap = True

    map_seed0 = dmaker.get_outputs(return_sum=True)[0]
    dmaker.pipelines[0].stages[-1].bootstrap_seed = 1
    map_seed1 = dmaker.get_outputs(return_sum=True)[0]

    logging.debug(f"Map with seed 0 is:\n{map_seed0}")
    logging.debug(f"Map with seed 1 is:\n{map_seed1}")

    assert not map_seed0 == map_seed1

    dmaker.pipelines[0].stages[-1].bootstrap_seed = 0
    map_seed0_reprod = dmaker.get_outputs(return_sum=True)[0]

    assert map_seed0 == map_seed0_reprod

    logging.info("<< PASS : kde_bootstrapping >>")
Beispiel #4
0
def serve(config, ref, port=DFLT_PORT):
    """Instantiate PISA objects and run server for processing requests.

    Parameters
    ----------
    config : str or iterable thereof
        Resource path(s) to pipeline config(s)

    ref : str
        Resource path to reference map

    port : int or str, optional

    """
    # Instantiate the objects here to save having to do this repeatedly
    dist_maker = DistributionMaker(config)
    ref = MapSet.from_json(ref)

    # Define server as a closure such that it captures the above-instantiated objects
    class MyTCPHandler(SocketServer.BaseRequestHandler):
        """
        The request handler class for our server.

        It is instantiated once per connection to the server, and must override
        the handle() method to implement communication to the client.

        See SocketServer.BaseRequestHandler for documentation of args.
        """
        def handle(self):
            try:
                param_values = receive_obj(self.request)
            except ConnectionClosed:
                return
            dist_maker._set_rescaled_free_params(param_values)  # pylint: disable=protected-access
            test_map = dist_maker.get_outputs(return_sum=True)[0]
            llh = test_map.llh(
                expected_values=ref,
                binned=
                False,  # return sum over llh from all bins (not per-bin llh's)
            )
            send_obj(llh, self.request)

    server = SocketServer.TCPServer((DFLT_HOST, int(port)), MyTCPHandler)
    print("llh server started on {}:{}".format(DFLT_HOST, port))
    server.serve_forever()
Beispiel #5
0
    def __init__(self, pipelines, label=None, shared_params=None):
        self.label = label
        self._source_code_hash = None

        if shared_params == None:
            self.shared_params = []
        else:
            self.shared_params = shared_params

        if isinstance(pipelines,
                      (str, PISAConfigParser, OrderedDict, Pipeline)):
            pipelines = [pipelines]

        self._distribution_makers, self.det_names = [], []
        for pipeline in pipelines:
            if not isinstance(pipeline, Pipeline):
                pipeline = Pipeline(pipeline)

            name = pipeline._detector_name
            if name in self.det_names:
                self._distribution_makers[self.det_names.index(name)].append(
                    pipeline)
            else:
                self._distribution_makers.append([pipeline])
                self.det_names.append(name)

        if None in self.det_names and len(self.det_names) > 1:
            raise NameError(
                'At least one of the used pipelines has no detector_name.')

        for i, pipelines in enumerate(self._distribution_makers):
            self._distribution_makers[i] = DistributionMaker(
                pipelines=pipelines)

        for sp in self.shared_params:
            n = 0
            for distribution_maker in self._distribution_makers:
                if sp in distribution_maker.params.free.names:
                    n += 1
            if n < 2:
                raise NameError(
                    'Shared param %s only a free param in less than 2 detectors.'
                    % sp)
Beispiel #6
0
def create_mc_template(toymc_params, config_file=None, seed=None):
    '''
    Create MC template out of a pisa pipeline
    '''
    if seed is not None:
        np.random.seed(seed)

    Config = parse_pipeline_config(config_file)

    new_n_events_data = Param(name='n_events_data',
                              value=toymc_params.n_data,
                              prior=None,
                              range=None,
                              is_fixed=True)
    new_sig_frac = Param(name='signal_fraction',
                         value=toymc_params.signal_fraction,
                         prior=None,
                         range=None,
                         is_fixed=True)
    new_stats_factor = Param(name='stats_factor',
                             value=toymc_params.stats_factor,
                             prior=None,
                             range=None,
                             is_fixed=True)

    # These should match the values of the config file, but we override them just in case we need to change these later
    new_mu = Param(name='mu',
                   value=toymc_params.mu,
                   prior=None,
                   range=[0, 100],
                   is_fixed=False)
    new_sigma = Param(name='sigma',
                      value=toymc_params.sigma,
                      prior=None,
                      range=None,
                      is_fixed=True)
    Config[('data', 'pi_simple_signal')]['params'].update(p=ParamSet([
        new_n_events_data, new_sig_frac, new_stats_factor, new_mu, new_sigma
    ]))

    MCtemplate = DistributionMaker(Config)

    return MCtemplate
Beispiel #7
0
def scan_allsyst(template_settings, steps, hypo_param_selections, outdir,
                 minimizer_settings, metric, debug_mode):
    """Scan (separately) all systematics (i.e., non-fixed params).

    Parameters
    ----------
    template_settings
    steps
    hypo_param_selections
    outdir
    minimizer_settings
    metric
    debug_mode

    Returns
    -------
    restults : dict
        Keys are param names, values are the scan results

    """
    outdir = expanduser(expandvars(outdir))
    mkdir(outdir, warn=False)

    hypo_maker = DistributionMaker(template_settings)

    hypo_maker.select_params(hypo_param_selections)
    data_dist = hypo_maker.get_outputs(return_sum=True)

    minimizer_settings = from_file(minimizer_settings)

    analysis = Analysis()

    results = OrderedDict()  # pylint: disable=redefined-outer-name
    for param in hypo_maker.params:
        if param.is_fixed:
            continue

        logging.info('Scanning %s', param.name)
        nominal_value = param.value

        outfile = join(
            outdir,
            '{:s}_{:d}_steps_{:s}_scan.json'.format(param.name, steps, metric))
        if isfile(outfile):
            raise IOError(
                '`outfile` "{}" exists, not overwriting.'.format(outfile))

        results[param.name] = analysis.scan(
            data_dist=data_dist,
            hypo_maker=hypo_maker,
            hypo_param_selections=hypo_param_selections,
            metric=metric,
            param_names=param.name,
            steps=steps,
            only_points=None,
            outer=True,
            profile=False,
            minimizer_settings=minimizer_settings,
            outfile=outfile,
            debug_mode=debug_mode)

        to_file(results[param.name], outfile)
        param.value = nominal_value

        logging.info('Done scanning param "%s"', param.name)

    logging.info('Done.')

    return results
Beispiel #8
0
def test_bootstrap():
    """Unit test for the bootstrap stage."""

    from pisa.core.distribution_maker import DistributionMaker
    from pisa.core.map import Map
    from pisa.utils.config_parser import parse_pipeline_config
    from pisa.utils.comparisons import ALLCLOSE_KW

    from numpy.testing import assert_allclose

    example_cfg = parse_pipeline_config("settings/pipeline/example.cfg")

    # We need to insert the bootstrap stage right after the data loading stage
    bootstrap_pipe_cfg = insert_bootstrap_after_data_loader(example_cfg, seed=0)

    logging.debug("bootstrapped pipeline stage order:")
    logging.debug(list(bootstrap_pipe_cfg.keys()))

    # get a baseline
    dmaker = DistributionMaker([example_cfg])
    map_baseline = dmaker.get_outputs(return_sum=True)[0]

    # Make sure that different seeds produce different maps, and that the same seed will
    # produce the same map.
    dmaker = DistributionMaker([bootstrap_pipe_cfg])
    map_seed0 = dmaker.get_outputs(return_sum=True)[0]

    # find key of bootstrap stage
    bootstrap_idx = 0
    for i, stage in enumerate(dmaker.pipelines[0].stages):
        if stage.__class__.__name__ == "bootstrap":
            bootstrap_idx = i

    # without re-loading the entire pipeline, we set the seed and call the setup function
    # to save time for the test
    dmaker.pipelines[0].stages[bootstrap_idx].seed = 1
    dmaker.pipelines[0].stages[bootstrap_idx].setup()

    map_seed1 = dmaker.get_outputs(return_sum=True)[0]

    assert not map_seed0 == map_seed1

    dmaker.pipelines[0].stages[bootstrap_idx].seed = 0
    dmaker.pipelines[0].stages[bootstrap_idx].setup()
    map_seed0_reprod = dmaker.get_outputs(return_sum=True)[0]

    assert map_seed0 == map_seed0_reprod

    # Quantify the variance of the resulting maps. They should be about the size of the
    # expectation from sum of weights-squared.

    nominal_values = []
    for i in range(100):
        dmaker.pipelines[0].stages[bootstrap_idx].seed = i
        dmaker.pipelines[0].stages[bootstrap_idx].setup()
        map_bootstrap = dmaker.get_outputs(return_sum=True)[0]
        nominal_values.append(map_bootstrap.nominal_values)

    nominal_values = np.stack(nominal_values)
    with np.errstate(divide="ignore", invalid="ignore"):
        # calculate the ratio between the bootstrap nominal and the baseline nominal
        bs_nom_ratios = np.mean(nominal_values, axis=0) / map_baseline.nominal_values
        # and the standard deviation ratio as well
        bs_std_ratios = np.std(nominal_values, axis=0) / map_baseline.std_devs
        # assert that both nominal and standard deviation match the expectation from
        # baseline up to a small error
        assert np.abs(np.nanmean(bs_nom_ratios) - 1.0) < 0.01
        # the standard deviations are a little harder to match in 100 samples
        assert np.abs(np.nanmean(bs_std_ratios) - 1.0) < 0.02

    logging.info("<< PASS : bootstrap >>")
Beispiel #9
0
def systematics_tests(return_outputs=False):
    """Load the HypoTesting class and use it to do a systematic study
    in Asimov.

    This will take some input pipeline configuration and then turn each one of
    the systematics off in turn, doing a new hypothesis test each time. The
    user will have the option to fix this systematic to either the baseline or
    some shifted value (+/- 1 sigma, or appropriate). One also has the ability
    in the case of the latter to still fit with this systematically incorrect
    hypothesis."""
    # NOTE: import here to avoid circular refs
    from pisa.scripts.analysis import parse_args
    init_args_d = parse_args(description=systematics_tests.__doc__,
                             command=systematics_tests)

    # NOTE: Removing extraneous args that won't get passed to instantiate the
    # HypoTesting object via dictionary's `pop()` method.
    inject_wrong = init_args_d.pop('inject_wrong')
    fit_wrong = init_args_d.pop('fit_wrong')
    only_syst = init_args_d.pop('only_syst')
    do_baseline = not init_args_d.pop('skip_baseline')
    if fit_wrong:
        if not inject_wrong:
            raise ValueError('You have specified to fit the systematically'
                             ' wrong hypothesis but have not specified to'
                             ' actually generate a systematically wrong'
                             ' hypothesis. If you want to flag "fit_wrong"'
                             ' please also flag "inject_wrong"')
        else:
            logging.info('Injecting a systematically wrong hypothesis while'
                         ' also allowing the minimiser to attempt to correct'
                         ' for it.')
    else:
        if inject_wrong:
            logging.info('Injecting a systematically wrong hypothesis but'
                         ' NOT allowing the minimiser to attempt to correct'
                         ' for it. Hypothesis maker will be FIXED at the'
                         ' baseline value.')
        else:
            logging.info('A standard N-1 test will be performed where each'
                         ' systematic is fixed to the baseline value'
                         ' one-by-one.')

    # Normalize and convert `pipeline` filenames; store to `*_maker`
    # (which is argument naming convention that HypoTesting init accepts).
    # For this test, pipeline is required so we don't need the try arguments
    # or the checks on it being None
    filenames = init_args_d.pop('pipeline')
    filenames = sorted([normcheckpath(fname) for fname in filenames])
    init_args_d['h0_maker'] = filenames
    # However, we do need them for the selections, since they can be different
    for maker in ['h0', 'h1', 'data']:
        ps_name = maker + '_param_selections'
        ps_str = init_args_d[ps_name]
        if ps_str is None:
            ps_list = None
        else:
            ps_list = [x.strip().lower() for x in ps_str.split(',')]
        init_args_d[ps_name] = ps_list

    init_args_d['data_maker'] = init_args_d['h0_maker']
    init_args_d['h1_maker'] = init_args_d['h0_maker']
    init_args_d['h0_maker'] = DistributionMaker(init_args_d['h0_maker'])
    init_args_d['h1_maker'] = DistributionMaker(init_args_d['h1_maker'])
    init_args_d['h1_maker'].select_params(init_args_d['h1_param_selections'])
    init_args_d['data_maker'] = DistributionMaker(init_args_d['data_maker'])
    if init_args_d['data_param_selections'] is None:
        init_args_d['data_param_selections'] = \
            init_args_d['h0_param_selections']
        init_args_d['data_name'] = init_args_d['h0_name']
    init_args_d['data_maker'].select_params(
        init_args_d['data_param_selections'])

    if only_syst is not None:
        for syst in only_syst:
            if syst not in init_args_d['h0_maker'].params.free.names:
                raise ValueError(
                    'Systematic test requested to be performed on systematic'
                    ' %s but it does not appear in the free parameters of the'
                    ' pipeline passed to the script - %s.' %
                    (syst, init_args_d['h0_maker'].params.free.names))
        logging.info(
            'Performing chosen systematic test on just the following'
            ' systematics - %s.', only_syst)

    # Instantiate the analysis object
    hypo_testing = HypoTesting(**init_args_d)
    # Everything is set up so do the tests
    outputs = hypo_testing.asimov_syst_tests(  # pylint: disable=redefined-outer-name
        inject_wrong=inject_wrong,
        fit_wrong=fit_wrong,
        only_syst=only_syst,
        do_baseline=do_baseline,
        h0_name=init_args_d['h0_name'],
        h1_name=init_args_d['h1_name'],
        data_name=init_args_d['data_name'])

    if return_outputs:
        return outputs
Beispiel #10
0
    set_verbosity(args.v)

    if os.path.isfile(args.outfile):
        print("Output file ", args.outfile,
              " already existed, delete or remove it.")
    else:
        if args.blind:
            assert (args.function == 'fit')
            assert (args.pseudo_data == 'data')

        if args.data_settings is None:
            data_settings = args.template_settings
        else:
            data_settings = args.data_settings

        data_maker = DistributionMaker(data_settings)
        template_maker = DistributionMaker(args.template_settings)

        if not args.fix_param == '':
            template_maker.params.fix(args.fix_param)
        if args.set_param is not None:
            for one_set_param in args.set_param:
                p_name, value = one_set_param.split("=")
                print("set_parm ", p_name, " to  ", value)
                value = parse_quantity(value)
                value = value.n * value.units
                prm = template_maker.params[p_name]
                print("old", p_name, ".value for template= ", prm.value)
                prm.value = value
                template_maker.update_params(prm)
                print("new ", p_name, ".value for template= ", prm.value)
def main():
    args = parse_args()
    init_args_d = vars(args)

    # NOTE: Removing extraneous args that won't get passed to instantiate the
    # HypoTesting object via dictionary's `pop()` method.

    set_verbosity(init_args_d.pop('v'))

    detector = init_args_d.pop('detector')
    selection = init_args_d.pop('selection')
    atype = init_args_d.pop('atype')
    return_total = not init_args_d.pop('return_bits')

    # Normalize and convert `*_pipeline` filenames; store to `*_maker`
    # (which is argument naming convention that HypoTesting init accepts).
    for maker in ['h0', 'h1']:
        filenames = init_args_d.pop(maker + '_pipeline')
        if filenames is not None:
            filenames = sorted(
                [normcheckpath(fname) for fname in filenames]
            )
        init_args_d[maker + '_maker'] = filenames

        ps_name = maker + '_param_selections'
        ps_str = init_args_d[ps_name]
        if ps_str is None:
            ps_list = None
        else:
            ps_list = [x.strip().lower() for x in ps_str.split(',')]
        init_args_d[ps_name] = ps_list

    # Add dummies to the argument we don't care about for making these plots
    init_args_d['minimizer_settings'] = {}
    init_args_d['data_is_data'] = None
    init_args_d['fluctuate_data'] = None
    init_args_d['fluctuate_fid'] = None
    init_args_d['metric'] = 'chi2'

    if init_args_d['h1_maker'] is None:
        init_args_d['h1_maker'] = init_args_d['h0_maker']

    init_args_d['h0_maker'] = DistributionMaker(init_args_d['h0_maker'])
    init_args_d['h1_maker'] = DistributionMaker(init_args_d['h1_maker'])
    init_args_d['h1_maker'].select_params(init_args_d['h1_param_selections'])

    # Instantiate the analysis object
    hypo_testing = HypoTesting(**init_args_d)

    h0_maker = hypo_testing.h0_maker
    h0_maker.select_params(init_args_d['h0_param_selections'])
    for h0_pipeline in h0_maker.pipelines:
        # Need a special case where PID is a separate stage
        if 'pid' in h0_pipeline.stage_names:
            if return_total:
                raise ValueError(
                    "PID is a separate stage but you have requested"
                    " return_total in the arguments to this script."
                )
            return_h0_sum = False
        else:
            return_h0_sum = return_total
    h0_maps = h0_maker.get_outputs(return_sum=return_h0_sum)

    # Assume just a singular pipeline used here.
    # Not sure how else to deal with PID as a separate stage.
    if not return_h0_sum:
        h0_maps = h0_maps[0]

    h1_maker = hypo_testing.h1_maker
    h1_maker.select_params(init_args_d['h1_param_selections'])
    for h1_pipeline in h1_maker.pipelines:
        # Need a special case where PID is a separate stage
        if 'pid' in h1_pipeline.stage_names:
            if return_total:
                raise ValueError(
                    "PID is a separate stage but you have requested"
                    " return_total in the arguments to this script."
                )
            return_h1_sum = False
        else:
            return_h1_sum = return_total
    h1_maps = h1_maker.get_outputs(return_sum=return_h1_sum)

    # Assume just a singular pipeline used here.
    # Not sure how else to deal with PID as a separate stage.
    if not return_h1_sum:
        h1_maps = h1_maps[0]

    if not sorted(h0_maps.names) == sorted(h1_maps.names):
        raise ValueError(
            "The output names of your h0 and h1 pipelines "
            "do not agree - %s and %s."%(
                sorted(h0_maps.names), sorted(h1_maps.names)
            )
        )

    det_sel = []
    if detector.strip() != '':
        det_sel.append(detector.strip())
    if selection.strip() != '':
        det_sel.append(selection.strip())
    det_sel_label = ' '.join(det_sel)

    det_sel_plot_label = det_sel_label
    if det_sel_plot_label != '':
        det_sel_plot_label += ', '

    det_sel_file_label = det_sel_label
    if det_sel_file_label != '':
        det_sel_file_label += '_'
    det_sel_file_label = det_sel_file_label.replace(' ', '_')

    # Need a special case where PID is a separate stage
    if fnmatch(''.join(h0_maps.names), '*_tr*ck*'):

        h0_trck_map = h0_maps.combine_wildcard('*_tr*ck')
        h1_trck_map = h1_maps.combine_wildcard('*_tr*ck')
        h0_cscd_map = h0_maps.combine_wildcard('*_c*sc*d*')
        h1_cscd_map = h1_maps.combine_wildcard('*_c*sc*d*')

        plot_asymmetry(
            h0_map=h0_trck_map,
            h1_map=h1_trck_map,
            h0_name='%s' % args.h0_name,
            h1_name='%s' % args.h1_name,
            fulltitle='%sevents identified as track' % det_sel_plot_label,
            savename='%strck' % det_sel_file_label,
            outdir=args.logdir,
            atype=atype
        )

        plot_asymmetry(
            h0_map=h0_cscd_map,
            h1_map=h1_cscd_map,
            h0_name='%s' % args.h0_name,
            h1_name='%s' % args.h1_name,
            fulltitle=('%sevents identified as cascade'
                       % det_sel_plot_label),
            savename='%scscd' % det_sel_file_label,
            outdir=args.logdir,
            atype=atype
        )

    # Otherwise, PID is assumed to be a binning dimension
    elif 'pid' in h0_maps[h0_maps.names[0]].binning.names:

        for map_name in h0_maps.names:
            h0_map = h0_maps[map_name]
            h0_map.set_errors(error_hist=None)

            h1_map = h1_maps[map_name]
            h1_map.set_errors(error_hist=None)

            pid_names = h0_map.binning['pid'].bin_names
            if pid_names != h1_map.binning['pid'].bin_names:
                raise ValueError(
                    "h0 and h1 maps must have same PID bin names"
                    " in order to make the asymmetry plots"
                )
            if pid_names is None:
                logging.warning(
                    "There are no names given for the PID bins, thus "
                    "they will just be numbered in both the the plot "
                    "save names and titles."
                )
                pid_names = [
                    x for x in range(0, h0_map.binning['pid'].num_bins)
                ]

            for pid_name in pid_names:
                    
                h0_to_plot = h0_map.split(
                    dim='pid',
                    bin=pid_name
                )
                
                h1_to_plot = h1_map.split(
                    dim='pid',
                    bin=pid_name
                )

                if isinstance(pid_name, int):
                    pid_name = 'PID Bin %i' % (pid_name)

                plot_asymmetry(
                    h0_map=h0_to_plot,
                    h1_map=h1_to_plot,
                    h0_name='%s' % args.h0_name,
                    h1_name='%s' % args.h1_name,
                    fulltitle=('%sevents identified as %s'
                               % (det_sel_plot_label, pid_name)),
                    savename=('%s_%s%s' % (map_name,
                                           det_sel_file_label,
                                           pid_name)),
                    outdir=args.logdir,
                    atype=atype
                )

    else:

        for map_name in h0_maps.names:
            
            h0_map = h0_maps[map_name]
            h0_map.set_errors(error_hist=None)
            
            h1_map = h1_maps[map_name]
            h1_map.set_errors(error_hist=None)

            plot_asymmetry(
                h0_map=h0_map,
                h1_map=h1_map,
                h0_name='%s' % args.h0_name,
                h1_name='%s' % args.h1_name,
                fulltitle=('%sevents'%(det_sel_plot_label)),
                savename=('%s_%s' % (map_name, det_sel_file_label)),
                outdir=args.logdir,
                atype=atype
            )
Beispiel #12
0
def compare(outdir,
            ref,
            ref_label,
            test,
            test_label,
            asymm_max=None,
            asymm_min=None,
            combine=None,
            diff_max=None,
            diff_min=None,
            fract_diff_max=None,
            fract_diff_min=None,
            json=False,
            pdf=False,
            png=False,
            ref_abs=False,
            ref_param_selections=None,
            sum=None,
            test_abs=False,
            test_param_selections=None):
    """Compare two entities. The result each entity specification is
    formatted into a MapSet and stored to disk, so that e.g. re-running
    a DistributionMaker is unnecessary to reproduce the results.

    Parameters
    ----------
    outdir : string
        Store output plots to this directory

    ref : string or array of strings
        Pipeline settings config file that generates reference output,
        or a stored map or map set. Multiple pipelines, maps, or map sets are
        supported

    ref_abs : bool
        Use the absolute value of the reference plot for comparisons

    ref_label : string
        Label for reference

    ref_param-selections : string
        Param selections to apply to ref pipeline config(s). Not
        applicable if ref specifies stored map or map sets

    test : string or array of strings
        Pipeline settings config file that generates test output, or a
        stored map or map set. Multiple pipelines, maps, or map sets are
        supported

    test_abs : bool
        Use the absolute value of the test plot for comparisons

    test_label : string
        Label for test

    test_param_selections : None or string
        Param selections to apply to test pipeline config(s). Not
        applicable if test specifies stored map or map sets

    combine : None or string or array of strings
        Combine by wildcard string, where string globbing (a la command
        line) uses asterisk for any number of wildcard characters. Use
        single quotes such that asterisks do not get expanded by the
        shell. Multiple combine strings supported

    sum : None or int
        Sum over (and hence remove) the specified axis or axes. I.e.,
        project the map onto remaining (unspecified) axis or axes

    json : bool
        Save output maps in compressed json (json.bz2) format

    pdf : bool
        Save plots in PDF format. If neither this nor png is
        specified, no plots are produced

    png : bool
        Save plots in PNG format. If neither this nor pdf is specfied,
        no plots are produced

    diff_min : None or float
        Difference plot vmin; if you specify only one of diff_min or
        diff_max, symmetric limits are automatically used (min = -max)

    diff_max : None or float
        Difference plot max; if you specify only one of diff_min or
        diff_max, symmetric limits are automatically used (min = -max)

    fract_diff_min : None or float
        Fractional difference plot vmin; if you specify only one of
        fract_diff_min or fract_diff_max, symmetric limits are
        automatically used (min = -max)

    fract_diff_max : None or float
        Fractional difference plot max; if you specify only one of
        fract_diff_min or fract_diff_max, symmetric limits are
        automatically used (min = -max)

    asymm_min : None or float
        Asymmetry plot vmin; if you specify only one of asymm_min or
        asymm_max, symmetric limits are automatically used (min = -max)

    asymm_max : None or float
        Fractional difference plot max; if you specify only one of
        asymm_min or asymm_max, symmetric limits are automatically used
        (min = -max)

    Returns
    -------
    summary_stats : dict
        Dictionary containing a summary for each h Map processed

    diff : MapSet
        MapSet of the difference
        - (Test - Ref)

    fract_diff : MapSet
        MapSet of the fractional difference
        - (Test - Ref) / Ref

    asymm : MapSet
        MapSet of the asymmetric fraction difference or pull
        - (Test - Ref) / sqrt(Ref)

    """
    ref_plot_label = ref_label
    if ref_abs and not ref_label.startswith('abs'):
        ref_plot_label = 'abs(%s)' % ref_plot_label
    test_plot_label = test_label
    if test_abs and not test_label.startswith('abs'):
        test_plot_label = 'abs(%s)' % test_plot_label

    plot_formats = []
    if pdf:
        plot_formats.append('pdf')
    if png:
        plot_formats.append('png')

    diff_symm = True
    if diff_min is not None and diff_max is None:
        diff_max = -diff_min
        diff_symm = False
    if diff_max is not None and diff_min is None:
        diff_min = -diff_max
        diff_symm = False

    fract_diff_symm = True
    if fract_diff_min is not None and fract_diff_max is None:
        fract_diff_max = -fract_diff_min
        fract_diff_symm = False
    if fract_diff_max is not None and fract_diff_min is None:
        fract_diff_min = -fract_diff_max
        fract_diff_symm = False

    asymm_symm = True
    if asymm_max is not None and asymm_min is None:
        asymm_min = -asymm_max
        asymm_symm = False
    if asymm_min is not None and asymm_max is None:
        asymm_max = -asymm_min
        asymm_symm = False

    outdir = os.path.expanduser(os.path.expandvars(outdir))
    mkdir(outdir)

    # Get the reference distribution(s) into the form of a test MapSet
    p_ref = None
    ref_source = None
    if isinstance(ref, Map):
        p_ref = MapSet(ref)
        ref_source = MAP_SOURCE_STR
    elif isinstance(ref, MapSet):
        p_ref = ref
        ref_source = MAPSET_SOURCE_STR
    elif isinstance(ref, Pipeline):
        if ref_param_selections is not None:
            ref.select_params(ref_param_selections)
        p_ref = ref.get_outputs()
        ref_source = PIPELINE_SOURCE_STR
    elif isinstance(ref, DistributionMaker):
        if ref_param_selections is not None:
            ref.select_params(ref_param_selections)
        p_ref = ref.get_outputs()
        ref_source = DISTRIBUTIONMAKER_SOURCE_STR
    else:
        if len(ref) == 1:
            try:
                ref_pipeline = Pipeline(config=ref[0])
            except:
                pass
            else:
                ref_source = PIPELINE_SOURCE_STR
                if ref_param_selections is not None:
                    ref_pipeline.select_params(ref_param_selections)
                p_ref = ref_pipeline.get_outputs()
        else:
            try:
                ref_dmaker = DistributionMaker(pipelines=ref)
            except:
                pass
            else:
                ref_source = DISTRIBUTIONMAKER_SOURCE_STR
                if ref_param_selections is not None:
                    ref_dmaker.select_params(ref_param_selections)
                p_ref = ref_dmaker.get_outputs()

    if p_ref is None:
        try:
            p_ref = [Map.from_json(f) for f in ref]
        except:
            pass
        else:
            ref_source = MAP_SOURCE_STR
            p_ref = MapSet(p_ref)

    if p_ref is None:
        assert ref_param_selections is None
        assert len(ref) == 1, 'Can only handle one MapSet'
        try:
            p_ref = MapSet.from_json(ref[0])
        except:
            pass
        else:
            ref_source = MAPSET_SOURCE_STR

    if p_ref is None:
        raise ValueError(
            'Could not instantiate the reference Pipeline, DistributionMaker,'
            ' Map, or MapSet from ref value(s) %s' % ref)
    ref = p_ref

    logging.info('Reference map(s) derived from a ' + ref_source)

    # Get the test distribution(s) into the form of a test MapSet
    p_test = None
    test_source = None
    if isinstance(test, Map):
        p_test = MapSet(test)
        test_source = MAP_SOURCE_STR
    elif isinstance(test, MapSet):
        p_test = test
        test_source = MAPSET_SOURCE_STR
    elif isinstance(test, Pipeline):
        if test_param_selections is not None:
            test.select_params(test_param_selections)
        p_test = test.get_outputs()
        test_source = PIPELINE_SOURCE_STR
    elif isinstance(test, DistributionMaker):
        if test_param_selections is not None:
            test.select_params(test_param_selections)
        p_test = test.get_outputs()
        test_source = DISTRIBUTIONMAKER_SOURCE_STR
    else:
        if len(test) == 1:
            try:
                test_pipeline = Pipeline(config=test[0])
            except:
                pass
            else:
                test_source = PIPELINE_SOURCE_STR
                if test_param_selections is not None:
                    test_pipeline.select_params(test_param_selections)
                p_test = test_pipeline.get_outputs()
        else:
            try:
                test_dmaker = DistributionMaker(pipelines=test)
            except:
                pass
            else:
                test_source = DISTRIBUTIONMAKER_SOURCE_STR
                if test_param_selections is not None:
                    test_dmaker.select_params(test_param_selections)
                p_test = test_dmaker.get_outputs()

    if p_test is None:
        try:
            p_test = [Map.from_json(f) for f in test]
        except:
            pass
        else:
            test_source = MAP_SOURCE_STR
            p_test = MapSet(p_test)

    if p_test is None:
        assert test_param_selections is None
        assert len(test) == 1, 'Can only handle one MapSet'
        try:
            p_test = MapSet.from_json(test[0])
        except:
            pass
        else:
            test_source = MAPSET_SOURCE_STR

    if p_test is None:
        raise ValueError(
            'Could not instantiate the test Pipeline, DistributionMaker, Map,'
            ' or MapSet from test value(s) %s' % test)
    test = p_test

    logging.info('Test map(s) derived from a ' + test_source)

    if combine is not None:
        ref = ref.combine_wildcard(combine)
        test = test.combine_wildcard(combine)
        if isinstance(ref, Map):
            ref = MapSet([ref])
        if isinstance(test, Map):
            test = MapSet([test])

    if sum is not None:
        ref = ref.sum(sum)
        test = test.sum(sum)

    # Set the MapSet names according to args passed by user
    ref.name = ref_label
    test.name = test_label

    # Save to disk the maps being plotted (excluding optional aboslute value
    # operations)
    if json:
        refmaps_path = os.path.join(outdir, 'maps__%s.json.bz2' % ref_label)
        to_file(ref, refmaps_path)

        testmaps_path = os.path.join(outdir, 'maps__%s.json.bz2' % test_label)
        to_file(test, testmaps_path)

    if set(test.names) != set(ref.names):
        raise ValueError('Test map names %s do not match ref map names %s.' %
                         (sorted(test.names), sorted(ref.names)))

    # Aliases to save keystrokes
    def masked(x):
        return np.ma.masked_invalid(x.nominal_values)

    def zero_to_nan(map):
        newmap = deepcopy(map)
        mask = np.isclose(newmap.nominal_values, 0, rtol=0, atol=EPSILON)
        newmap.hist[mask] = np.nan
        return newmap

    reordered_test = []
    new_ref = []
    diff_maps = []
    fract_diff_maps = []
    asymm_maps = []
    summary_stats = {}
    for ref_map in ref:
        test_map = test[ref_map.name].reorder_dimensions(ref_map.binning)
        if ref_abs:
            ref_map = abs(ref_map)
        if test_abs:
            test_map = abs(test_map)

        diff_map = test_map - ref_map
        fract_diff_map = (test_map - ref_map) / zero_to_nan(ref_map)
        asymm_map = (test_map - ref_map) / zero_to_nan(ref_map**0.5)
        abs_fract_diff_map = np.abs(fract_diff_map)

        new_ref.append(ref_map)
        reordered_test.append(test_map)
        diff_maps.append(diff_map)
        fract_diff_maps.append(fract_diff_map)
        asymm_maps.append(asymm_map)

        min_ref = np.min(masked(ref_map))
        max_ref = np.max(masked(ref_map))

        min_test = np.min(masked(test_map))
        max_test = np.max(masked(test_map))

        total_ref = np.sum(masked(ref_map))
        total_test = np.sum(masked(test_map))

        mean_ref = np.mean(masked(ref_map))
        mean_test = np.mean(masked(test_map))

        max_abs_fract_diff = np.max(masked(abs_fract_diff_map))
        mean_abs_fract_diff = np.mean(masked(abs_fract_diff_map))
        median_abs_fract_diff = np.median(masked(abs_fract_diff_map))

        mean_fract_diff = np.mean(masked(fract_diff_map))
        min_fract_diff = np.min(masked(fract_diff_map))
        max_fract_diff = np.max(masked(fract_diff_map))
        std_fract_diff = np.std(masked(fract_diff_map))

        mean_diff = np.mean(masked(diff_map))
        min_diff = np.min(masked(diff_map))
        max_diff = np.max(masked(diff_map))
        std_diff = np.std(masked(diff_map))

        median_diff = np.nanmedian(masked(diff_map))
        mad_diff = np.nanmedian(masked(np.abs(diff_map)))
        median_fract_diff = np.nanmedian(masked(fract_diff_map))
        mad_fract_diff = np.nanmedian(masked(np.abs(fract_diff_map)))

        min_asymm = np.min(masked(fract_diff_map))
        max_asymm = np.max(masked(fract_diff_map))

        total_asymm = np.sqrt(np.sum(masked(asymm_map)**2))

        summary_stats[test_map.name] = OrderedDict([
            ('min_ref', min_ref),
            ('max_ref', max_ref),
            ('total_ref', total_ref),
            ('mean_ref', mean_ref),
            ('min_test', min_test),
            ('max_test', max_test),
            ('total_test', total_test),
            ('mean_test', mean_test),
            ('max_abs_fract_diff', max_abs_fract_diff),
            ('mean_abs_fract_diff', mean_abs_fract_diff),
            ('median_abs_fract_diff', median_abs_fract_diff),
            ('min_fract_diff', min_fract_diff),
            ('max_fract_diff', max_fract_diff),
            ('mean_fract_diff', mean_fract_diff),
            ('std_fract_diff', std_fract_diff),
            ('median_fract_diff', median_fract_diff),
            ('mad_fract_diff', mad_fract_diff),
            ('min_diff', min_diff),
            ('max_diff', max_diff),
            ('mean_diff', mean_diff),
            ('std_diff', std_diff),
            ('median_diff', median_diff),
            ('mad_diff', mad_diff),
            ('min_asymm', min_asymm),
            ('max_asymm', max_asymm),
            ('total_asymm', total_asymm),
        ])

        logging.info('Map %s...', ref_map.name)
        logging.info('  Ref map(s):')
        logging.info('    min   :' + ('%.2f' % min_ref).rjust(12))
        logging.info('    max   :' + ('%.2f' % max_ref).rjust(12))
        logging.info('    total :' + ('%.2f' % total_ref).rjust(12))
        logging.info('    mean  :' + ('%.2f' % mean_ref).rjust(12))
        logging.info('  Test map(s):')
        logging.info('    min   :' + ('%.2f' % min_test).rjust(12))
        logging.info('    max   :' + ('%.2f' % max_test).rjust(12))
        logging.info('    total :' + ('%.2f' % total_test).rjust(12))
        logging.info('    mean  :' + ('%.2f' % mean_test).rjust(12))
        logging.info('  Absolute fract. diff., abs((Test - Ref) / Ref):')
        logging.info('    max   : %.4e', max_abs_fract_diff)
        logging.info('    mean  : %.4e', mean_abs_fract_diff)
        logging.info('    median: %.4e', median_abs_fract_diff)
        logging.info('  Fractional difference, (Test - Ref) / Ref:')
        logging.info('    min   : %.4e', min_fract_diff)
        logging.info('    max   : %.4e', max_fract_diff)
        logging.info('    mean  : %.4e +/- %.4e', mean_fract_diff,
                     std_fract_diff)
        logging.info('    median: %.4e +/- %.4e', median_fract_diff,
                     mad_fract_diff)
        logging.info('  Difference, Test - Ref:')
        logging.info('    min   : %.4e', min_diff)
        logging.info('    max   : %.4e', max_diff)
        logging.info('    mean  : %.4e +/- %.4e', mean_diff, std_diff)
        logging.info('    median: %.4e +/- %.4e', median_diff, mad_diff)
        logging.info('  Asymmetry, (Test - Ref) / sqrt(Ref)')
        logging.info('    min   : %.4e', min_asymm)
        logging.info('    max   : %.4e', max_asymm)
        logging.info('    total : %.4e (sum in quadrature)', total_asymm)
        logging.info('')

    ref = MapSet(new_ref)
    test = MapSet(reordered_test)
    diff = MapSet(diff_maps)
    fract_diff = MapSet(fract_diff_maps)
    asymm = MapSet(asymm_maps)

    if json:
        diff.to_json(
            os.path.join(
                outdir,
                'diff__%s__%s.json.bz2' % (test_plot_label, ref_plot_label)))
        fract_diff.to_json(
            os.path.join(
                outdir, 'fract_diff__%s___%s.json.bz2' %
                (test_plot_label, ref_plot_label)))
        asymm.to_json(
            os.path.join(
                outdir,
                'asymm__%s___%s.json.bz2' % (test_plot_label, ref_plot_label)))
        to_file(
            summary_stats,
            os.path.join(
                outdir,
                'stats__%s__%s.json.bz2' % (test_plot_label, ref_plot_label)))

    for plot_format in plot_formats:
        # Plot the raw distributions
        plotter = Plotter(stamp='',
                          outdir=outdir,
                          fmt=plot_format,
                          log=False,
                          annotate=False,
                          symmetric=False,
                          ratio=False)
        plotter.plot_2d_array(ref, fname='distr__%s' % ref_plot_label)
        plotter.plot_2d_array(test, fname='distr__%s' % test_plot_label)

        # Plot the difference (test - ref)
        plotter = Plotter(stamp='',
                          outdir=outdir,
                          fmt=plot_format,
                          log=False,
                          annotate=False,
                          symmetric=diff_symm,
                          ratio=False)
        plotter.label = '%s - %s' % (test_plot_label, ref_plot_label)
        plotter.plot_2d_array(
            test - ref,
            fname='diff__%s__%s' % (test_plot_label, ref_plot_label),
            #vmin=diff_min, vmax=diff_max
        )

        # Plot the fractional difference (test - ref)/ref
        plotter = Plotter(stamp='',
                          outdir=outdir,
                          fmt=plot_format,
                          log=False,
                          annotate=False,
                          symmetric=fract_diff_symm,
                          ratio=True)
        plotter.label = ('(%s-%s)/%s' %
                         (test_plot_label, ref_plot_label, ref_plot_label))
        plotter.plot_2d_array(
            (test - ref) / MapSet([zero_to_nan(r) for r in ref]),
            fname='fract_diff__%s__%s' % (test_plot_label, ref_plot_label),
            #vmin=fract_diff_min, vmax=fract_diff_max
        )

        # Plot the asymmetry (test - ref)/sqrt(ref)
        plotter = Plotter(stamp='',
                          outdir=outdir,
                          fmt=plot_format,
                          log=False,
                          annotate=False,
                          symmetric=asymm_symm,
                          ratio=True)
        plotter.label = (r'$(%s - %s)/\sqrt{%s}$' %
                         (test_plot_label, ref_plot_label, ref_plot_label))
        plotter.plot_2d_array(
            (test - ref) / MapSet([zero_to_nan(r**0.5) for r in ref]),
            fname='asymm__%s__%s' % (test_plot_label, ref_plot_label),
            #vmin=asymm_min, vmax=asymm_max
        )

    return summary_stats, diff, fract_diff, asymm
Beispiel #13
0
def profile_scan(data_settings, template_settings, param_names, steps,
                 only_points, no_outer, data_param_selections,
                 hypo_param_selections, profile, outfile, minimizer_settings,
                 metric, debug_mode):
    """Perform a profile scan.

    Parameters
    ----------
    data_settings
    template_settings
    param_names
    steps
    only_points
    no_outer
    data_param_selections
    hypo_param_selections
    profile
    outfile
    minimizer_settings
    metric
    debug_mode

    Returns
    -------
    results
    analysis

    """
    outfile = expanduser(expandvars(outfile))
    if isfile(outfile):
        raise IOError('`outfile` "{}" already exists!'.format(outfile))

    minimizer_settings = from_file(minimizer_settings)

    hypo_maker = DistributionMaker(template_settings)

    if data_settings is None:
        if (data_param_selections is None
                or data_param_selections == hypo_param_selections):
            data_maker = hypo_maker
        else:
            data_maker = deepcopy(hypo_maker)
            data_maker.select_params(data_param_selections)
    else:
        data_maker = DistributionMaker(data_settings)
        data_maker.select_params(data_param_selections)

    data_dist = data_maker.get_outputs(return_sum=True)

    analysis = Analysis()
    results = analysis.scan(data_dist=data_dist,
                            hypo_maker=hypo_maker,
                            hypo_param_selections=hypo_param_selections,
                            metric=metric,
                            param_names=param_names,
                            steps=steps,
                            only_points=only_points,
                            outer=not no_outer,
                            profile=profile,
                            minimizer_settings=minimizer_settings,
                            outfile=outfile,
                            debug_mode=debug_mode)
    to_file(results, outfile)
    logging.info("Done.")

    return results, analysis
Beispiel #14
0
def create_hypersurfaces(fit_cfg):
    """Generate and store mapsets for different discrete systematics sets
    (with a single set characterised by a dedicated pipeline configuration)

    Parameters
    ----------
    fit_cfg : string
        Path to a fit config file

    Returns
    -------
    hypersurfaces : OrderedDict
        Container with the fitted hypersurface for each map type

    """

    #
    # Parse fit config file
    #

    parsed_fit_cfg, sys_list, sys_func_list, units_list, combine_regex = parse_fit_config(fit_cfg)


    #
    # Create the hypersurface params
    #

    # Loop over the param names and functional forms and create the params
    #TODO Add option to support initial param guesses
    params = [ HypersurfaceParam(name=param_name, func_name=param_func_name) for param_name, param_func_name in zip(sys_list, sys_func_list) ]


    #
    # Parse defintion of each dataset
    #

    fit_cfg_txt_buf = StringIO()
    parsed_fit_cfg.write(fit_cfg_txt_buf)
    fit_cfg_txt = fit_cfg_txt_buf.getvalue()

    nominal_pipeline_cfg = None
    nominal_param_values = None
    sys_pipeline_cfgs = []
    sys_param_values = []

    # Loop over config
    for section in parsed_fit_cfg.sections():

        no_ws_section = section.strip()

        section_prefix = no_ws_section.split(":")[0].strip()
        is_nominal = section_prefix == NOMINAL_SET_PFX
        is_dataset = is_nominal or section_prefix == SYS_SET_PFX

        if is_dataset:

            # Parse the list of systematics parameter values from the section name
            sys_param_point = tuple(float(x) for x in section.split(":")[1].split(","))

            if len(sys_param_point) != len(sys_list):
                raise ValueError(
                    "Section heading [{}] specifies {:d} systematic"
                    " parameter values, but there are {:d} systematics".format(
                        section, len(sys_param_point), len(sys_list)
                    )
                )

            # Parse the config file
            parsed_pipeline_cfg, pipeline_cfg_path = load_and_modify_pipeline_cfg(
                fit_cfg=parsed_fit_cfg, section=section
            )

            # Store
            if is_nominal :
                assert nominal_pipeline_cfg is None, "Found multiple nominal dataset definitions"
                nominal_pipeline_cfg = parsed_pipeline_cfg
                nominal_param_values = sys_param_point
            else :
                sys_pipeline_cfgs.append(parsed_pipeline_cfg)
                sys_param_values.append(sys_param_point)

        # In this loop, nothing to do for general & apply_to_all_sets sections
        elif no_ws_section in (GENERAL_SECTION_NAME, APPLY_ALL_SECTION_NAME):
            pass

        # Do not allow any other sections in the config
        else:
            raise ValueError("Invalid section in fit config file: [%s]" % section)

    # Check found stuff
    assert nominal_pipeline_cfg is not None, "No nominal dataset definition found"
    assert len(sys_pipeline_cfgs) > 0, "No systematics dataset definitions found"

    # Re-format params into a dict, including the param names
    nominal_param_values = { name:val for name, val in zip(sys_list,nominal_param_values) }
    sys_param_values = [ { name:val for name, val in zip(sys_list,s) } for s in sys_param_values ]


    #
    # Create mapsets
    #

    # Get the nominal mapset
    nominal_dist_maker = DistributionMaker(nominal_pipeline_cfg)
    nominal_mapset = nominal_dist_maker.get_outputs(return_sum=False)[0]

    # Get the systematics mapsets
    sys_mapsets = []
    for sys_pipeline_cfg in sys_pipeline_cfgs :
        sys_dist_maker = DistributionMaker(sys_pipeline_cfg)
        sys_mapset = sys_dist_maker.get_outputs(return_sum=False)[0]
        sys_mapsets.append(sys_mapset)

    # Combine maps according to the provided regex, if one was provided
    if combine_regex:
        logging.info(
            "Combining maps according to regular expression(s) %s", combine_regex
        )
        nominal_mapset = nominal_mapset.combine_re(combine_regex)
        sys_mapsets = [ s.combine_re(combine_regex) for s in sys_mapsets ]


    #
    # Fit the hypersurface
    #

    hypersurfaces = OrderedDict()

    # Fit one per map, so loop over them
    for map_name in nominal_mapset.names :

        # Create the hypersurface
        hypersurface = Hypersurface( 
            params=params,
            initial_intercept=1., # Initial value for intercept
        )

        # Get just the requested map
        nominal_map = nominal_mapset[map_name]
        sys_maps = [ s[map_name] for s in sys_mapsets ]

        # Perform fit
        hypersurface.fit(
            nominal_map=nominal_map,
            nominal_param_values=nominal_param_values,
            sys_maps=sys_maps,
            sys_param_values=sys_param_values,
            norm=True,
        )

        # Store the result
        hypersurfaces[map_name] = hypersurface

    # Done
    return hypersurfaces
def main():
    args = parse_args()
    init_args_d = vars(args)

    # NOTE: Removing extraneous args that won't get passed to instantiate the
    # HypoTesting object via dictionary's `pop()` method.

    set_verbosity(init_args_d.pop('v'))

    detector = init_args_d.pop('detector')
    selection = init_args_d.pop('selection')

    # Normalize and convert `*_pipeline` filenames; store to `*_maker`
    # (which is argument naming convention that HypoTesting init accepts).
    filenames = init_args_d.pop('pipeline')
    if filenames is not None:
        filenames = sorted([normcheckpath(fname) for fname in filenames])
    ps_str = init_args_d['param_selections']
    if ps_str is None:
        ps_list = None
    else:
        ps_list = [x.strip().lower() for x in ps_str.split(',')]

    data_maker = DistributionMaker(filenames)
    data_maker.select_params(ps_list)

    for data_pipeline in data_maker.pipelines:
        # Need a special case where PID is a separate stage
        if 'pid' in data_pipeline.stage_names:
            raise ValueError("Special case for separate PID stage currently "
                             "not implemented.")
        else:
            return_sum = True
    baseline_maps = data_maker.get_outputs(return_sum=return_sum)

    det_sel = []
    if detector.strip() != '':
        det_sel.append(detector.strip())
    if selection.strip() != '':
        det_sel.append(selection.strip())
    det_sel_label = ' '.join(det_sel)

    det_sel_plot_label = det_sel_label
    if det_sel_plot_label != '':
        det_sel_plot_label += ', '

    det_sel_file_label = det_sel_label
    if det_sel_file_label != '':
        det_sel_file_label += '_'
    det_sel_file_label = det_sel_file_label.replace(' ', '_')

    for data_param in data_maker.params.free:
        # Calculate a shifted value based on the prior if possible
        if hasattr(data_param, 'prior') and (data_param.prior is not None):
            # Gaussian priors are easy - just do 1 sigma
            if data_param.prior.kind == 'gaussian':
                data_param.value = \
                    data_param.value + data_param.prior.stddev
                shift_label = r"$1\sigma$"
            # Else do 10%, or +/- 1 if the baseline is zero
            else:
                if data_param.value != 0.0:
                    data_param.value = 1.1 * data_param.value
                    shift_label = r"10%"
                else:
                    data_param.value = 1.0
                    shift_label = r"1"
        # For no prior also do 10%, or +/- 1 if the baseline is zero
        else:
            if data_param.value != 0.0:
                data_param.value = 1.1 * data_param.value
                shift_label = r"10%"
            else:
                data_param.value = 1.0
                shift_label = r"1"
        up_maps = data_maker.get_outputs(return_sum=return_sum)
        data_maker.params.reset_free()

        if hasattr(data_param, 'prior') and (data_param.prior is not None):
            if data_param.prior.kind == 'gaussian':
                data_param.value = \
                    data_param.value - data_param.prior.stddev
            else:
                if data_param.value != 0.0:
                    data_param.value = 0.9 * data_param.value
                else:
                    data_param.value = -1.0
        else:
            if data_param.value != 0.0:
                data_param.value = 0.9 * data_param.value
            else:
                data_param.value = -1.0
        down_maps = data_maker.get_outputs(return_sum=return_sum)
        data_maker.params.reset_free()

        baseline_map = baseline_maps['total']
        baseline_map.set_errors(error_hist=None)
        up_map = up_maps['total']
        up_map.set_errors(error_hist=None)
        down_map = down_maps['total']
        down_map.set_errors(error_hist=None)

        pid_names = baseline_map.binning['pid'].bin_names
        if pid_names is None:
            logging.warn('There are no names given for the PID bins, thus '
                         'they will just be numbered in both the the plot '
                         'save names and titles.')
            pid_names = [
                x for x in range(0, baseline_map.binning['pid'].num_bins)
            ]

        gridspec_kw = dict(left=0.04, right=0.966, wspace=0.32)
        fig, axes = plt.subplots(nrows=2,
                                 ncols=len(pid_names),
                                 gridspec_kw=gridspec_kw,
                                 sharex=False,
                                 sharey=False,
                                 figsize=(7 * len(pid_names), 14))

        for i, pid_name in enumerate(pid_names):

            baseline = baseline_map.split(dim='pid', bin=pid_name)
            up_to_plot = up_map.split(dim='pid', bin=pid_name)
            up_to_plot = (up_to_plot - baseline) / baseline * 100.0
            down_to_plot = down_map.split(dim='pid', bin=pid_name)
            down_to_plot = (down_to_plot - baseline) / baseline * 100.0

            if isinstance(pid_name, int):
                pid_name = 'PID Bin %i' % (pid_name)
            else:
                pid_name += ' Channel'

            up_to_plot.plot(fig=fig,
                            ax=axes[0][i],
                            title="%s " % (pid_name) + "\n" + " %s + %s" %
                            (tex_axis_label(data_param.name), shift_label),
                            titlesize=30,
                            cmap=plt.cm.seismic,
                            clabel='% Change from Baseline',
                            clabelsize=30,
                            xlabelsize=24,
                            ylabelsize=24,
                            symm=True)
            down_to_plot.plot(fig=fig,
                              ax=axes[1][i],
                              title="%s " % (pid_name) + "\n" + " %s - %s" %
                              (tex_axis_label(data_param.name), shift_label),
                              titlesize=30,
                              cmap=plt.cm.seismic,
                              clabel='% Change from Baseline',
                              clabelsize=30,
                              xlabelsize=24,
                              ylabelsize=24,
                              symm=True)

        fig.subplots_adjust(hspace=0.4)
        savename = det_sel_file_label
        if savename != '' and savename[-1] != '_':
            savename += '_'
        savename += '%s_variation.png' % (data_param.name)
        mkdir(args.logdir, warn=False)
        fig.savefig(os.path.join(args.logdir, savename), bbox_inches='tight')
        plt.close(fig.number)
Beispiel #16
0
def inj_param_scan(return_outputs=False):
    """Load the HypoTesting class and use it to do an Asimov test across the
    space of one of the injected parameters.

    The user will define the parameter and pass a numpy-interpretable string to
    set the range of values. For example, one could scan over the space of
    theta23 by using a string such as `"numpy.linspace(0.35, 0.65, 31)"` which
    will then be evaluated to figure out a space of theta23 to inject and run
    Asimov tests.
    """
    # NOTE: import here to avoid circular refs
    from pisa.scripts.analysis import parse_args
    init_args_d = parse_args(description=inj_param_scan.__doc__,
                             command=inj_param_scan)

    # Normalize and convert `*_pipeline` filenames; store to `*_maker`
    # (which is argument naming convention that HypoTesting init accepts).
    # For this test, pipeline is required so we don't need the try arguments
    # or the checks on it being None
    filenames = init_args_d.pop('pipeline')
    filenames = sorted([normcheckpath(fname) for fname in filenames])
    init_args_d['h0_maker'] = filenames
    # However, we do need them for the selections, since they can be different
    for maker in ['h0', 'h1', 'data']:
        ps_name = maker + '_param_selections'
        ps_str = init_args_d[ps_name]
        if ps_str is None:
            ps_list = None
        else:
            ps_list = [x.strip().lower() for x in ps_str.split(',')]
        init_args_d[ps_name] = ps_list

    init_args_d['data_maker'] = init_args_d['h0_maker']
    init_args_d['h1_maker'] = init_args_d['h0_maker']
    init_args_d['h0_maker'] = DistributionMaker(init_args_d['h0_maker'])
    init_args_d['h1_maker'] = DistributionMaker(init_args_d['h1_maker'])
    init_args_d['h1_maker'].select_params(init_args_d['h1_param_selections'])
    init_args_d['data_maker'] = DistributionMaker(init_args_d['data_maker'])
    if init_args_d['data_param_selections'] is None:
        init_args_d['data_param_selections'] = \
            init_args_d['h0_param_selections']
        init_args_d['data_name'] = init_args_d['h0_name']
    init_args_d['data_maker'].select_params(
        init_args_d['data_param_selections'])

    # Remove final parameters that don't want to be passed to HypoTesting
    param_name = init_args_d.pop('param_name')
    inj_vals = eval(init_args_d.pop('inj_vals'))
    inj_units = init_args_d.pop('inj_units')
    force_prior = init_args_d.pop('use_inj_prior')

    # Instantiate the analysis object
    hypo_testing = HypoTesting(**init_args_d)

    logging.info('Scanning over %s between %.4f and %.4f with %i vals',
                 param_name, min(inj_vals), max(inj_vals), len(inj_vals))
    # Modify parameters if necessary
    if param_name == 'sin2theta23':
        requested_vals = inj_vals
        inj_vals = np.arcsin(np.sqrt(inj_vals))
        logging.info(
            'Converting to theta23 values. Equivalent range is %.4f to %.4f'
            ' radians, or %.4f to %.4f degrees', min(inj_vals), max(inj_vals),
            min(inj_vals) * 180 / np.pi,
            max(inj_vals) * 180 / np.pi)
        test_name = 'theta23'
        inj_units = 'radians'

    elif param_name == 'deltam31':
        raise ValueError('Need to implement a test where it ensures the sign '
                         'of the requested values matches those in truth and '
                         'the hypo makers (else it makes no sense). For now, '
                         'please select deltam3l instead.')

    elif param_name == 'deltam3l':
        # Ensure all values are the same sign, else it doesn't make any sense
        if not np.alltrue(np.sign(inj_vals)):
            raise ValueError("Not all requested values to inject are the same "
                             "sign. This doesn't make any sense given that you"
                             " have requested to inject different values of "
                             "deltam3l.")
        logging.info('Parameter requested was deltam3l - will convert assuming'
                     ' that this is always the largest of the two splittings '
                     'i.e. deltam3l = deltam31 for deltam3l > 0 and deltam3l '
                     '= deltam32 for deltam3l < 0.')
        inj_sign = np.sign(inj_vals)[0]
        requested_vals = inj_vals
        test_name = 'deltam31'
        deltam21_val = hypo_testing.data_maker.params['deltam21'].value.to(
            inj_units).magnitude
        if inj_sign == 1:
            no_inj_vals = requested_vals
            io_inj_vals = (requested_vals - deltam21_val) * -1.0
        else:
            io_inj_vals = requested_vals
            no_inj_vals = (requested_vals * -1.0) + deltam21_val
        inj_vals = []
        for no_inj_val, io_inj_val in zip(no_inj_vals, io_inj_vals):
            o_vals = {}
            o_vals['nh'] = no_inj_val
            o_vals['ih'] = io_inj_val
            inj_vals.append(o_vals)

    else:
        test_name = param_name
        requested_vals = inj_vals

    unit_inj_vals = []
    for inj_val in inj_vals:
        if isinstance(inj_val, dict):
            o_vals = {}
            for ivkey in inj_val.keys():
                o_vals[ivkey] = inj_val[ivkey] * ureg(inj_units)
            unit_inj_vals.append(o_vals)
        else:
            unit_inj_vals.append(inj_val * ureg(inj_units))
    inj_vals = unit_inj_vals

    # Extend the ranges of the distribution makers so that they reflect the
    # range of the scan. This is a pain if there are different values depending
    # on the ordering. Need to extend the ranges of both values in the
    # hypothesis maker since the hypotheses may minimise over the ordering,
    # and could then go out of range.

    # Also, some parameters CANNOT go negative or else things won't work.
    # To account for this, check if parameters lower value was positive and,
    # if so, enforce that it is positive now.
    if isinstance(inj_vals[0], dict):
        # Calculate ranges for both parameters
        norangediff = max(no_inj_vals) - min(no_inj_vals)
        norangediff = norangediff * ureg(inj_units)
        norangetuple = (min(no_inj_vals) * ureg(inj_units) - 0.5 * norangediff,
                        max(no_inj_vals) * ureg(inj_units) + 0.5 * norangediff)
        iorangediff = max(io_inj_vals) - min(io_inj_vals)
        iorangediff = iorangediff * ureg(inj_units)
        iorangetuple = (min(io_inj_vals) * ureg(inj_units) - 0.5 * iorangediff,
                        max(io_inj_vals) * ureg(inj_units) + 0.5 * iorangediff)
        # Do it for both hierarchies
        for hierarchy, rangetuple in zip(['nh', 'ih'],
                                         [norangetuple, iorangetuple]):
            hypo_testing.set_param_ranges(selection=hierarchy,
                                          test_name=test_name,
                                          rangetuple=rangetuple,
                                          inj_units=inj_units)
        # Select the proper params again
        hypo_testing.h0_maker.select_params(init_args_d['h0_param_selections'])
        hypo_testing.h1_maker.select_params(init_args_d['h1_param_selections'])
    # Otherwise it's way simpler...
    else:
        rangediff = max(inj_vals) - min(inj_vals)
        rangetuple = (min(inj_vals) - 0.5 * rangediff,
                      max(inj_vals) + 0.5 * rangediff)
        hypo_testing.set_param_ranges(selection=None,
                                      test_name=test_name,
                                      rangetuple=rangetuple,
                                      inj_units=inj_units)

    if hypo_testing.data_maker.params[test_name].prior is not None:
        if hypo_testing.data_maker.params[test_name].prior.kind != 'uniform':
            if force_prior:
                logging.warning(
                    'Parameter to be scanned, %s, has a %s prior that you have'
                    ' requested to be left on. This will likely make the'
                    ' results wrong.', test_name,
                    hypo_testing.data_maker.params[test_name].prior.kind)
            else:
                logging.info(
                    'Parameter to be scanned, %s, has a %s prior.This will be'
                    ' changed to a uniform prior (i.e. no prior) for this'
                    ' test.', test_name,
                    hypo_testing.data_maker.params[test_name].prior.kind)
                uniformprior = Prior(kind='uniform')
                hypo_testing.h0_maker.params[test_name].prior = uniformprior
                hypo_testing.h1_maker.params[test_name].prior = uniformprior
    else:
        if force_prior:
            raise ValueError('Parameter to be scanned, %s, does not have a'
                             ' prior but you have requested to force one to be'
                             ' left on. Something is potentially wrong.' %
                             test_name)
        else:
            logging.info(
                'Parameter to be scanned, %s, does not have a prior.'
                ' So nothing needs to be done.', test_name)

    # Everything is set up. Now do the scan.
    outputs = hypo_testing.asimov_inj_param_scan(  # pylint: disable=redefined-outer-name
        param_name=param_name,
        test_name=test_name,
        inj_vals=inj_vals,
        requested_vals=requested_vals,
        h0_name=init_args_d['h0_name'],
        h1_name=init_args_d['h1_name'],
        data_name=init_args_d['data_name'])

    if return_outputs:
        return outputs
Beispiel #17
0
def test_pi_resample():
    """Unit test for the resampling stage."""
    from pisa.core.distribution_maker import DistributionMaker
    from pisa.core.map import Map
    from pisa.utils.config_parser import parse_pipeline_config
    from pisa.utils.log import set_verbosity, logging
    from pisa.utils.comparisons import ALLCLOSE_KW
    from collections import OrderedDict
    from copy import deepcopy

    example_cfg = parse_pipeline_config('settings/pipeline/example.cfg')
    reco_binning = example_cfg[('utils', 'pi_hist')]['output_specs']
    coarse_binning = reco_binning.downsample(reco_energy=2, reco_coszen=2)
    assert coarse_binning.is_compat(reco_binning)

    # replace binning of output with coarse binning
    example_cfg[('utils', 'pi_hist')]['output_specs'] = coarse_binning
    # make another pipeline with an upsampling stage to the original binning
    upsample_cfg = deepcopy(example_cfg)
    pi_resample_cfg = OrderedDict()
    pi_resample_cfg['input_specs'] = coarse_binning
    pi_resample_cfg['output_specs'] = reco_binning
    pi_resample_cfg['scale_errors'] = True
    upsample_cfg[('utils', 'pi_resample')] = pi_resample_cfg

    example_maker = DistributionMaker([example_cfg])
    upsampled_maker = DistributionMaker([upsample_cfg])

    example_map = example_maker.get_outputs(return_sum=True)[0]
    example_map_upsampled = upsampled_maker.get_outputs(return_sum=True)[0]

    # First check: The upsampled map must have the same total count as the original map
    assert np.isclose(
        np.sum(example_map.nominal_values),
        np.sum(example_map_upsampled.nominal_values),
    )

    # Check consistency of modified chi-square
    # ----------------------------------------
    # When the assumption holds that events are uniformly distributed over the coarse
    # bins, the modified chi-square should not change from upscaling the maps. We test
    # this by making a fluctuated coarse map and then upsampling that map according to
    # the assumption by bin volumes. We should find that the modified chi-square between
    # the coarse map and the coarse fluctuated map is the same as the upsampled map and
    # the upsampled fluctuated map.

    # It doesn't matter precisely how we fluctuate it here, we just want any different
    # map...
    random_map_coarse = example_map.fluctuate(method='scaled_poisson',
                                              random_state=42)
    random_map_coarse.set_errors(None)

    # This bit is an entirely independent implementation of the upsampling. The count
    # in every bin is scaled according to the reatio of weighted bin volumes.
    upsampled_hist = np.zeros_like(example_map_upsampled.nominal_values)
    upsampled_errs = np.zeros_like(example_map_upsampled.nominal_values)
    up_binning = example_map_upsampled.binning

    coarse_hist = np.array(random_map_coarse.nominal_values)
    coarse_errors = np.array(random_map_coarse.std_devs)
    coarse_binning = random_map_coarse.binning

    for bin_idx in np.ndindex(upsampled_hist.shape):
        one_bin = up_binning[bin_idx]
        fine_bin_volume = one_bin.weighted_bin_volumes(
            attach_units=False, ).squeeze().item()
        # the following is basically an independent implementation of translate.lookup
        coarse_index = []  # index where the upsampled bin came from
        for dim in up_binning.names:
            x = one_bin[dim].weighted_centers[
                0].m  # middle point of the one bin
            bins = coarse_binning[
                dim].bin_edges.m  # coarse bin edges in that dim
            coarse_index.append(np.digitize(x, bins) -
                                1)  # index 1 means bin 0
        coarse_index = tuple(coarse_index)
        coarse_bin_volume = coarse_binning.weighted_bin_volumes(
            attach_units=False, )[coarse_index].squeeze().item()

        upsampled_hist[bin_idx] = coarse_hist[coarse_index]
        upsampled_hist[bin_idx] *= fine_bin_volume
        upsampled_hist[bin_idx] /= coarse_bin_volume

    # done, at last!
    random_map_upsampled = Map(name="random_upsampled",
                               hist=upsampled_hist,
                               binning=up_binning)
    random_map_upsampled.set_errors(None)

    # After ALL THIS, we get the same modified chi-square from the coarse and the
    # upsampled pair of maps. Neat, huh?
    assert np.allclose(
        random_map_coarse.mod_chi2(example_map),
        random_map_upsampled.mod_chi2(example_map_upsampled),
        **ALLCLOSE_KW,
    )
    logging.info('<< PASS : pi_resample >>')
Beispiel #18
0
def make_discrete_sys_distributions(fit_cfg, set_params=None):
    """Generate and store mapsets for different discrete systematics sets
    (with a single set characterised by a dedicated pipeline configuration)

    Parameters
    ----------
    fit_cfg : string
        Path to a fit config file

    Returns
    -------
    input_data : OrderedDict
        Container with the processed input data including MapSets
        resulting from each input pipelines

    """
    # check optional `set_params`
    if set_params is not None:
        if not isinstance(set_params, Mapping):
            raise TypeError("`set_params` must be dict-like")
        for param_name, param_value in set_params.items():
            if not isinstance(param_name, basestring):
                raise TypeError(
                    "`set_params` keys must be strings (parameter name)")
            if not isinstance(param_value, ureg.Quantity):
                raise TypeError("`set_params` values must be Quantities")

    parsed_fit_cfg, sys_list, units_list, combine_regex = parse_fit_config(
        fit_cfg)
    fit_cfg_txt_buf = StringIO()
    parsed_fit_cfg.write(fit_cfg_txt_buf)
    fit_cfg_txt = fit_cfg_txt_buf.getvalue()

    # prepare the data container
    input_data = OrderedDict()
    input_data["fit_cfg_path"] = fit_cfg
    input_data["fit_cfg_txt"] = fit_cfg_txt
    input_data["param_names"] = sys_list
    input_data["param_units"] = units_list
    input_data["datasets"] = []

    # -- Load systematics sets -- #

    found_nominal = False
    sys_sets_info = OrderedDict()

    for section in parsed_fit_cfg.sections():
        no_ws_section = section.strip()

        section_pfx = no_ws_section.split(":")[0].strip()
        is_nominal = section_pfx == NOMINAL_SET_PFX
        is_sys_set = is_nominal or section_pfx == SYS_SET_PFX

        if is_nominal:
            if found_nominal:
                raise ValueError(
                    "Found multiple nominal sets in fit cfg! There must be"
                    " exactly one.")
            found_nominal = True

        if is_sys_set:
            # Parse the list of systematics parameter values from the section name
            sys_param_point = tuple(
                float(x) for x in section.split(":")[1].split(","))

            if len(sys_param_point) != len(sys_list):
                raise ValueError(
                    "Section heading [{}] specifies {:d} systematic"
                    " parameter values, but there are {:d} systematics".format(
                        section, len(sys_param_point), len(sys_list)))

            parsed_pipeline_cfg, pipeline_cfg_path = load_and_modify_pipeline_cfg(
                fit_cfg=parsed_fit_cfg, section=section)

            pipeline_cfg_txt_buf = StringIO()
            parsed_pipeline_cfg.write(pipeline_cfg_txt_buf)
            pipeline_cfg_txt = pipeline_cfg_txt_buf.getvalue()

            sys_sets_info[sys_param_point] = dict(
                is_nominal=is_nominal,
                parsed_pipeline_cfgs=[parsed_pipeline_cfg],
                pipeline_cfg_paths=[pipeline_cfg_path],
                pipeline_cfg_txts=[pipeline_cfg_txt],
            )

        # In this loop, nothing to do for general & apply_to_all_sets sections
        elif no_ws_section in (GENERAL_SECTION_NAME, APPLY_ALL_SECTION_NAME):
            pass

        # Do not allow any other sections in the config
        else:
            raise ValueError("Invalid section in fit config file: [%s]" %
                             section)

    if not found_nominal:
        raise ValueError(
            "Could not find a nominal discrete systematics set in fit cfg."
            " There must be exactly one.")

    nsets = len(sys_sets_info)
    nsys = len(sys_list)
    if nsets <= nsys:
        logging.warn(
            "Fit will either fail or be unreliable since the number of"
            " systematics sets to be fit is small (%d <= %d).",
            nsets,
            nsys + 1,
        )

    for sys_param_point, info in sys_sets_info.items():
        point_str = " | ".join(
            ["%s=%.2f" % (p, v) for p, v in zip(sys_list, sys_param_point)])

        logging.info(
            "Generating maps for discrete systematics point: %s. Using"
            ' pipeline config(s) at "%s"',
            point_str,
            info["pipeline_cfg_paths"],
        )

        # make a dedicated distribution maker for each systematics set
        distribution_maker = DistributionMaker(info["parsed_pipeline_cfgs"])

        # update params if requested
        if set_params is not None:
            for pname, pval in set_params.items():
                if pname not in distribution_maker.params.names:
                    raise ValueError("Unknown param '%s' in `set_params`" %
                                     pname)
                if (pval.dimensionality !=
                        distribution_maker.params[pname].dimensionality):
                    raise ValueError(
                        'Incorrect units for param "%s" in `set_params`' %
                        pname)
                distribution_maker.params[pname].value = pval
                logging.info("Changed param '%s' to %s", pname, pval)

        distribution_maker_param_values = OrderedDict()
        for dmpname in sorted(distribution_maker.params.names):
            dmpval = distribution_maker.params[dmpname].value
            distribution_maker_param_values[dmpname] = dmpval

        # run the distribution maker to get the mapset
        # TODO This assumes only one pipeline, either make more general or enforce
        mapset = distribution_maker.get_outputs(return_sum=False)[0]

        if combine_regex:
            logging.info(
                "Combining maps according to regular expression(s) %s",
                combine_regex)
            mapset = mapset.combine_re(combine_regex)

        # Store the info
        dataset = OrderedDict()
        dataset["pipeline_cfg_paths"] = info["pipeline_cfg_paths"]
        dataset["pipeline_cfg_txts"] = info["pipeline_cfg_txts"]
        dataset[
            "distribution_maker_param_values"] = distribution_maker_param_values
        dataset["param_values"] = sys_param_point
        dataset["mapset"] = mapset
        dataset["nominal"] = info["is_nominal"]
        input_data["datasets"].append(dataset)

    return input_data
Beispiel #19
0
def create_mc_template(toymc_params,
                       config_file=None,
                       seed=None,
                       keep_same_weight=True):
    '''
    Create MC template out of a pisa pipeline
    '''
    if seed is not None:
        np.random.seed(seed)

    Config = parse_pipeline_config(config_file)

    # Change binning
    Config[('data', 'pi_simple_signal')]['output_specs'] = toymc_params.binning
    Config[(
        'likelihood',
        'pi_generalized_llh_params')]['output_specs'] = toymc_params.binning

    # If keep_same_weight is True, turn off the mean adjust and pseudo weight of pi_generalized_llh
    if keep_same_weight:
        Config[('likelihood',
                'pi_generalized_llh_params')]['with_mean_adjust'] = False
        Config[('likelihood',
                'pi_generalized_llh_params')]['with_pseudo_weight'] = False
    else:
        Config[('likelihood',
                'pi_generalized_llh_params')]['with_mean_adjust'] = True
        Config[('likelihood',
                'pi_generalized_llh_params')]['with_pseudo_weight'] = True

    new_n_events_data = Param(name='n_events_data',
                              value=toymc_params.n_data,
                              prior=None,
                              range=None,
                              is_fixed=True)
    new_sig_frac = Param(name='signal_fraction',
                         value=toymc_params.signal_fraction,
                         prior=None,
                         range=None,
                         is_fixed=True)
    new_stats_factor = Param(name='stats_factor',
                             value=toymc_params.stats_factor,
                             prior=None,
                             range=None,
                             is_fixed=True)

    # These should match the values of the config file, but we override them just in case we need to change these later
    new_mu = Param(name='mu',
                   value=toymc_params.mu,
                   prior=None,
                   range=[0, 100],
                   is_fixed=False)
    new_sigma = Param(name='sigma',
                      value=toymc_params.sigma,
                      prior=None,
                      range=None,
                      is_fixed=True)
    Config[('data', 'pi_simple_signal')]['params'].update(p=ParamSet([
        new_n_events_data, new_sig_frac, new_stats_factor, new_mu, new_sigma
    ]))

    MCtemplate = DistributionMaker(Config)

    return MCtemplate