コード例 #1
0
ファイル: pipeline.py プロジェクト: icecube/pisa
    def __init__(self, config, profile=False):
        if isinstance(config, (str, PISAConfigParser)):
            config = parse_pipeline_config(config=config)
        elif isinstance(config, OrderedDict):
            pass
        else:
            raise TypeError(
                "`config` passed is of type %s but must be string,"
                " PISAConfigParser, or OrderedDict" % type(config).__name__
            )

        self.pisa_version = None

        self.name = config['pipeline']['name']
        self.data = ContainerSet(self.name)
        self.detector_name = config['pipeline']['detector_name']
        self.output_binning = config['pipeline']['output_binning']
        self.output_key = config['pipeline']['output_key']

        self._profile = profile

        self._stages = []
        self._config = config
        self._init_stages()
        self._source_code_hash = None
コード例 #2
0
def test_kde_bootstrapping(verbosity=Levels.WARN):
    """Unit test for the kde stage."""

    set_verbosity(verbosity)

    example_cfg = parse_pipeline_config("settings/pipeline/example.cfg")

    # We have to remove containers with too few events, otherwise the KDE fails simply
    # because too few distinct events are in one of the PID channels after bootstrapping.
    example_cfg[("data", "simple_data_loader")]["output_names"] = [
        "numu_cc",
        "numubar_cc",
    ]

    kde_stage_cfg = OrderedDict()
    kde_stage_cfg["apply_mode"] = example_cfg[("utils", "hist")]["apply_mode"]
    kde_stage_cfg["calc_mode"] = "events"
    kde_stage_cfg["bootstrap"] = False
    kde_stage_cfg["bootstrap_seed"] = 0
    kde_stage_cfg["bootstrap_niter"] = 5

    kde_pipe_cfg = deepcopy(example_cfg)

    # Replace histogram stage with KDE stage
    del kde_pipe_cfg[("utils", "hist")]
    kde_pipe_cfg[("utils", "kde")] = kde_stage_cfg

    # no errors in baseline since there is no bootstrapping enabled
    kde_pipe_cfg["pipeline"]["output_key"] = "weights"

    # get a baseline
    dmaker = DistributionMaker([kde_pipe_cfg])
    map_baseline = dmaker.get_outputs(return_sum=True)[0]
    logging.debug(f"Baseline KDE'd map:\n{map_baseline}")

    # Make sure that different seeds produce different maps, and that the same seed will
    # produce the same map.
    # We enable bootstrapping now, without re-loading everything, to save time.
    dmaker.pipelines[0].output_key = ("weights", "errors")
    dmaker.pipelines[0].stages[-1].bootstrap = True

    map_seed0 = dmaker.get_outputs(return_sum=True)[0]
    dmaker.pipelines[0].stages[-1].bootstrap_seed = 1
    map_seed1 = dmaker.get_outputs(return_sum=True)[0]

    logging.debug(f"Map with seed 0 is:\n{map_seed0}")
    logging.debug(f"Map with seed 1 is:\n{map_seed1}")

    assert not map_seed0 == map_seed1

    dmaker.pipelines[0].stages[-1].bootstrap_seed = 0
    map_seed0_reprod = dmaker.get_outputs(return_sum=True)[0]

    assert map_seed0 == map_seed0_reprod

    logging.info("<< PASS : kde_bootstrapping >>")
コード例 #3
0
ファイル: pipeline.py プロジェクト: hignight/pisa-1
    def __init__(self, config):
        if isinstance(config, (str, PISAConfigParser)):
            config = parse_pipeline_config(config=config)
        elif isinstance(config, OrderedDict):
            pass
        else:
            raise TypeError("`config` passed is of type %s but must be string,"
                            " PISAConfigParser, or OrderedDict" %
                            type(config).__name__)

        self.pisa_version = None

        self._stages = []
        self._detector_name = config.pop('detector_name', None)
        self._config = config
        self._init_stages()
        self._source_code_hash = None
コード例 #4
0
ファイル: likelihoods_1D_test.py プロジェクト: Raimer/pisa
def create_mc_template(toymc_params, config_file=None, seed=None):
    '''
    Create MC template out of a pisa pipeline
    '''
    if seed is not None:
        np.random.seed(seed)

    Config = parse_pipeline_config(config_file)

    new_n_events_data = Param(name='n_events_data',
                              value=toymc_params.n_data,
                              prior=None,
                              range=None,
                              is_fixed=True)
    new_sig_frac = Param(name='signal_fraction',
                         value=toymc_params.signal_fraction,
                         prior=None,
                         range=None,
                         is_fixed=True)
    new_stats_factor = Param(name='stats_factor',
                             value=toymc_params.stats_factor,
                             prior=None,
                             range=None,
                             is_fixed=True)

    # These should match the values of the config file, but we override them just in case we need to change these later
    new_mu = Param(name='mu',
                   value=toymc_params.mu,
                   prior=None,
                   range=[0, 100],
                   is_fixed=False)
    new_sigma = Param(name='sigma',
                      value=toymc_params.sigma,
                      prior=None,
                      range=None,
                      is_fixed=True)
    Config[('data', 'pi_simple_signal')]['params'].update(p=ParamSet([
        new_n_events_data, new_sig_frac, new_stats_factor, new_mu, new_sigma
    ]))

    MCtemplate = DistributionMaker(Config)

    return MCtemplate
コード例 #5
0
ファイル: bootstrap.py プロジェクト: icecube/pisa
def test_bootstrap():
    """Unit test for the bootstrap stage."""

    from pisa.core.distribution_maker import DistributionMaker
    from pisa.core.map import Map
    from pisa.utils.config_parser import parse_pipeline_config
    from pisa.utils.comparisons import ALLCLOSE_KW

    from numpy.testing import assert_allclose

    example_cfg = parse_pipeline_config("settings/pipeline/example.cfg")

    # We need to insert the bootstrap stage right after the data loading stage
    bootstrap_pipe_cfg = insert_bootstrap_after_data_loader(example_cfg, seed=0)

    logging.debug("bootstrapped pipeline stage order:")
    logging.debug(list(bootstrap_pipe_cfg.keys()))

    # get a baseline
    dmaker = DistributionMaker([example_cfg])
    map_baseline = dmaker.get_outputs(return_sum=True)[0]

    # Make sure that different seeds produce different maps, and that the same seed will
    # produce the same map.
    dmaker = DistributionMaker([bootstrap_pipe_cfg])
    map_seed0 = dmaker.get_outputs(return_sum=True)[0]

    # find key of bootstrap stage
    bootstrap_idx = 0
    for i, stage in enumerate(dmaker.pipelines[0].stages):
        if stage.__class__.__name__ == "bootstrap":
            bootstrap_idx = i

    # without re-loading the entire pipeline, we set the seed and call the setup function
    # to save time for the test
    dmaker.pipelines[0].stages[bootstrap_idx].seed = 1
    dmaker.pipelines[0].stages[bootstrap_idx].setup()

    map_seed1 = dmaker.get_outputs(return_sum=True)[0]

    assert not map_seed0 == map_seed1

    dmaker.pipelines[0].stages[bootstrap_idx].seed = 0
    dmaker.pipelines[0].stages[bootstrap_idx].setup()
    map_seed0_reprod = dmaker.get_outputs(return_sum=True)[0]

    assert map_seed0 == map_seed0_reprod

    # Quantify the variance of the resulting maps. They should be about the size of the
    # expectation from sum of weights-squared.

    nominal_values = []
    for i in range(100):
        dmaker.pipelines[0].stages[bootstrap_idx].seed = i
        dmaker.pipelines[0].stages[bootstrap_idx].setup()
        map_bootstrap = dmaker.get_outputs(return_sum=True)[0]
        nominal_values.append(map_bootstrap.nominal_values)

    nominal_values = np.stack(nominal_values)
    with np.errstate(divide="ignore", invalid="ignore"):
        # calculate the ratio between the bootstrap nominal and the baseline nominal
        bs_nom_ratios = np.mean(nominal_values, axis=0) / map_baseline.nominal_values
        # and the standard deviation ratio as well
        bs_std_ratios = np.std(nominal_values, axis=0) / map_baseline.std_devs
        # assert that both nominal and standard deviation match the expectation from
        # baseline up to a small error
        assert np.abs(np.nanmean(bs_nom_ratios) - 1.0) < 0.01
        # the standard deviations are a little harder to match in 100 samples
        assert np.abs(np.nanmean(bs_std_ratios) - 1.0) < 0.02

    logging.info("<< PASS : bootstrap >>")
コード例 #6
0
def test_pi_resample():
    """Unit test for the resampling stage."""
    from pisa.core.distribution_maker import DistributionMaker
    from pisa.core.map import Map
    from pisa.utils.config_parser import parse_pipeline_config
    from pisa.utils.log import set_verbosity, logging
    from pisa.utils.comparisons import ALLCLOSE_KW
    from collections import OrderedDict
    from copy import deepcopy

    example_cfg = parse_pipeline_config('settings/pipeline/example.cfg')
    reco_binning = example_cfg[('utils', 'pi_hist')]['output_specs']
    coarse_binning = reco_binning.downsample(reco_energy=2, reco_coszen=2)
    assert coarse_binning.is_compat(reco_binning)

    # replace binning of output with coarse binning
    example_cfg[('utils', 'pi_hist')]['output_specs'] = coarse_binning
    # make another pipeline with an upsampling stage to the original binning
    upsample_cfg = deepcopy(example_cfg)
    pi_resample_cfg = OrderedDict()
    pi_resample_cfg['input_specs'] = coarse_binning
    pi_resample_cfg['output_specs'] = reco_binning
    pi_resample_cfg['scale_errors'] = True
    upsample_cfg[('utils', 'pi_resample')] = pi_resample_cfg

    example_maker = DistributionMaker([example_cfg])
    upsampled_maker = DistributionMaker([upsample_cfg])

    example_map = example_maker.get_outputs(return_sum=True)[0]
    example_map_upsampled = upsampled_maker.get_outputs(return_sum=True)[0]

    # First check: The upsampled map must have the same total count as the original map
    assert np.isclose(
        np.sum(example_map.nominal_values),
        np.sum(example_map_upsampled.nominal_values),
    )

    # Check consistency of modified chi-square
    # ----------------------------------------
    # When the assumption holds that events are uniformly distributed over the coarse
    # bins, the modified chi-square should not change from upscaling the maps. We test
    # this by making a fluctuated coarse map and then upsampling that map according to
    # the assumption by bin volumes. We should find that the modified chi-square between
    # the coarse map and the coarse fluctuated map is the same as the upsampled map and
    # the upsampled fluctuated map.

    # It doesn't matter precisely how we fluctuate it here, we just want any different
    # map...
    random_map_coarse = example_map.fluctuate(method='scaled_poisson',
                                              random_state=42)
    random_map_coarse.set_errors(None)

    # This bit is an entirely independent implementation of the upsampling. The count
    # in every bin is scaled according to the reatio of weighted bin volumes.
    upsampled_hist = np.zeros_like(example_map_upsampled.nominal_values)
    upsampled_errs = np.zeros_like(example_map_upsampled.nominal_values)
    up_binning = example_map_upsampled.binning

    coarse_hist = np.array(random_map_coarse.nominal_values)
    coarse_errors = np.array(random_map_coarse.std_devs)
    coarse_binning = random_map_coarse.binning

    for bin_idx in np.ndindex(upsampled_hist.shape):
        one_bin = up_binning[bin_idx]
        fine_bin_volume = one_bin.weighted_bin_volumes(
            attach_units=False, ).squeeze().item()
        # the following is basically an independent implementation of translate.lookup
        coarse_index = []  # index where the upsampled bin came from
        for dim in up_binning.names:
            x = one_bin[dim].weighted_centers[
                0].m  # middle point of the one bin
            bins = coarse_binning[
                dim].bin_edges.m  # coarse bin edges in that dim
            coarse_index.append(np.digitize(x, bins) -
                                1)  # index 1 means bin 0
        coarse_index = tuple(coarse_index)
        coarse_bin_volume = coarse_binning.weighted_bin_volumes(
            attach_units=False, )[coarse_index].squeeze().item()

        upsampled_hist[bin_idx] = coarse_hist[coarse_index]
        upsampled_hist[bin_idx] *= fine_bin_volume
        upsampled_hist[bin_idx] /= coarse_bin_volume

    # done, at last!
    random_map_upsampled = Map(name="random_upsampled",
                               hist=upsampled_hist,
                               binning=up_binning)
    random_map_upsampled.set_errors(None)

    # After ALL THIS, we get the same modified chi-square from the coarse and the
    # upsampled pair of maps. Neat, huh?
    assert np.allclose(
        random_map_coarse.mod_chi2(example_map),
        random_map_upsampled.mod_chi2(example_map_upsampled),
        **ALLCLOSE_KW,
    )
    logging.info('<< PASS : pi_resample >>')
コード例 #7
0
def main():
    parser = ArgumentParser(description=__doc__)
    parser.add_argument('--oversampling',
                        action='store_true',
                        default=False,
                        help='''Run oversampling tests i.e. use a finer binning
                        through the truth stages in addition to the standard
                        tests. You must flag this if you want it.''')
    parser.add_argument('--weighting',
                        type=str,
                        default=None,
                        help='''Name of the weighting field to use in the
                        comparisons. This must correspond to a field in the
                        events files being used.''')
    parser.add_argument('--outdir',
                        metavar='DIR',
                        type=str,
                        required=True,
                        help='''Store all output plots to this directory. If
                        they don't exist, the script will make them, including
                        all subdirectories.''')
    parser.add_argument('-v',
                        action='count',
                        default=None,
                        help='set verbosity level')
    args = parser.parse_args()
    set_verbosity(args.v)

    known_weights = [None, 'weighted_aeff']

    if args.weighting not in known_weights:
        logging.warning('''%s weighting field not known to be in events file.
            Tests may not work in this case!''' % args.weighting)

    # Want these for all tests
    pisa_standard_settings = os.path.join(
        'tests', 'settings', 'recopid_full_pipeline_5stage_test.cfg')
    pisa_standard_config = parse_pipeline_config(pisa_standard_settings)
    pisa_recopid_settings = os.path.join(
        'tests', 'settings', 'recopid_full_pipeline_4stage_test.cfg')
    pisa_recopid_config = parse_pipeline_config(pisa_recopid_settings)

    # Add weighting to pipeline according to user input
    # Need to add it to both reco and PID for standard config
    reco_k = [k for k in pisa_standard_config.keys() \
              if k[0] == 'reco'][0]
    standard_reco_params = \
        pisa_standard_config[reco_k]['params'].params
    standard_reco_params.reco_weights_name.value = args.weighting
    pid_k = [k for k in pisa_standard_config.keys() \
             if k[0] == 'pid'][0]
    standard_pid_params = \
        pisa_standard_config[pid_k]['params'].params
    standard_pid_params.pid_weights_name.value = args.weighting
    # Just needs adding to reco for joined recopid config
    recopid_k = [k for k in pisa_recopid_config.keys() \
                 if k[0] == 'reco'][0]
    recopid_reco_params = \
        pisa_recopid_config[recopid_k]['params'].params
    recopid_reco_params.reco_weights_name.value = args.weighting

    # Load OscFit file for comparisons
    oscfitfile = os.path.join('tests', 'data', 'oscfit',
                              'OscFit1X600Baseline.json')

    # Rename in this instance now so it's clearer in logs and filenames
    if args.weighting == None:
        args.weighting = 'unweighted'

    logging.info("<<<< %s reco/pid Transformations >>>>" % args.weighting)
    # Perform baseline tests
    logging.info("<< No oversampling >>")
    do_comparisons(config1=deepcopy(pisa_standard_config),
                   config2=deepcopy(pisa_recopid_config),
                   oscfitfile=oscfitfile,
                   testname1='5-stage-%s' % args.weighting,
                   testname2='4-stage-%s' % args.weighting,
                   outdir=args.outdir)

    # Perform oversampled tests
    if args.oversampling:
        oversamples = [5, 10, 20, 50]
        for oversample in oversamples:
            pisa_standard_oversampled_config = oversample_config(
                base_config=deepcopy(pisa_standard_config),
                oversample=oversample)
            pisa_recopid_oversampled_config = oversample_config(
                base_config=deepcopy(pisa_recopid_config),
                oversample=oversample)
            logging.info("<< Oversampling by %i >>" % (oversample))
            do_comparisons(config1=deepcopy(pisa_standard_oversampled_config),
                           config2=deepcopy(pisa_recopid_oversampled_config),
                           oscfitfile=oscfitfile,
                           testname1='5-stage-%s-Oversampled%i' %
                           (args.weighting, oversample),
                           testname2='4-stage-%s-Oversampled%i' %
                           (args.weighting, oversample),
                           outdir=args.outdir)
コード例 #8
0
def create_mc_template(toymc_params,
                       config_file=None,
                       seed=None,
                       keep_same_weight=True):
    '''
    Create MC template out of a pisa pipeline
    '''
    if seed is not None:
        np.random.seed(seed)

    Config = parse_pipeline_config(config_file)

    # Change binning
    Config[('data', 'pi_simple_signal')]['output_specs'] = toymc_params.binning
    Config[(
        'likelihood',
        'pi_generalized_llh_params')]['output_specs'] = toymc_params.binning

    # If keep_same_weight is True, turn off the mean adjust and pseudo weight of pi_generalized_llh
    if keep_same_weight:
        Config[('likelihood',
                'pi_generalized_llh_params')]['with_mean_adjust'] = False
        Config[('likelihood',
                'pi_generalized_llh_params')]['with_pseudo_weight'] = False
    else:
        Config[('likelihood',
                'pi_generalized_llh_params')]['with_mean_adjust'] = True
        Config[('likelihood',
                'pi_generalized_llh_params')]['with_pseudo_weight'] = True

    new_n_events_data = Param(name='n_events_data',
                              value=toymc_params.n_data,
                              prior=None,
                              range=None,
                              is_fixed=True)
    new_sig_frac = Param(name='signal_fraction',
                         value=toymc_params.signal_fraction,
                         prior=None,
                         range=None,
                         is_fixed=True)
    new_stats_factor = Param(name='stats_factor',
                             value=toymc_params.stats_factor,
                             prior=None,
                             range=None,
                             is_fixed=True)

    # These should match the values of the config file, but we override them just in case we need to change these later
    new_mu = Param(name='mu',
                   value=toymc_params.mu,
                   prior=None,
                   range=[0, 100],
                   is_fixed=False)
    new_sigma = Param(name='sigma',
                      value=toymc_params.sigma,
                      prior=None,
                      range=None,
                      is_fixed=True)
    Config[('data', 'pi_simple_signal')]['params'].update(p=ParamSet([
        new_n_events_data, new_sig_frac, new_stats_factor, new_mu, new_sigma
    ]))

    MCtemplate = DistributionMaker(Config)

    return MCtemplate