Esempio n. 1
0
File: fit.py Progetto: terliuk/pisa
    def calculate_fit_coeffs(self):
        """
        Calculate the fit coefficients for each systematic, flavint, bin
        for a polynomial.
        """
        this_hash = hash_obj(
            [self.fit_binning.hash, self.weight_hash] +
            [self.params[name].value for name in self.fit_params],
            full_hash=self.full_hash
        )
        if self.fitcoeffs_hash == this_hash:
            return self._fit_coeffs

        if self.neutrinos:
            nu_params = self.nu_params
        else:
            nu_params = None
        if self.muons:
            mu_params = self.mu_params
        else:
            mu_params = None

        if self.params['cache_fit'].value:
            this_cache_hash = hash_obj(
                [self._data.metadata['name'], self._data.metadata['sample'],
                 self._data.metadata['cuts'], self.fit_binning.hash] +
                [self.params[name].value for name in self.fit_params],
                full_hash=self.full_hash
            )

            if self.fitcoeffs_cache_hash == this_cache_hash:
                fit_coeffs = deepcopy(self._cached_fc)
            elif this_cache_hash in self.disk_cache:
                logging.info('Loading fit coefficients from cache.')
                self._cached_fc = self.disk_cache[this_cache_hash]
                fit_coeffs = deepcopy(self._cached_fc)
                self.fitcoeffs_cache_hash = this_cache_hash
            else:
                fit_coeffs = self._calculate_fit_coeffs(
                    self._data, ParamSet(p for p in self.params
                                         if p.name in self.fit_params),
                    self.fit_binning, nu_params, mu_params
                )
        else:
            fit_coeffs = self._calculate_fit_coeffs(
                self._data, ParamSet(p for p in self.params
                                     if p.name in self.fit_params),
                self.fit_binning, nu_params, mu_params
            )

        if self.params['cache_fit'].value:
            if this_cache_hash not in self.disk_cache:
                logging.info('Caching fit coefficients values to disk.')
                self.disk_cache[this_cache_hash] = fit_coeffs

        self.fitcoeffs_hash = this_hash
        self._fit_coeffs = fit_coeffs
        return fit_coeffs
Esempio n. 2
0
class TEST_CONFIGS(object):
    """Default configurations for stages in a minimal test pipeline."""

    pipe_cfg = OrderedDict(
        pipeline={
            "name": "muons",
            "output_binning": TEST_BINNING,
            "output_key": ("weights"),
            "detector_name": None,
        }
    )
    event_generator_cfg = {
        "calc_mode": "events",
        "apply_mode": "events",
        "output_names": ["muon"],
        "params": ParamSet(
            [
                Param(name="n_events", value=1e3, **PARAM_DEFAULTS),
                Param(name="seed", value=0, **PARAM_DEFAULTS),
                Param(name="random", value=False, **PARAM_DEFAULTS),
            ]
        ),
    }
    aeff_cfg = {
        "calc_mode": "events",
        "apply_mode": "events",
        "params": ParamSet(
            [
                Param(name="livetime", value=12345 * ureg["seconds"], **PARAM_DEFAULTS),
                Param(name="weight_scale", value=1.0, **PARAM_DEFAULTS),
            ]
        ),
    }
    set_variance_cfg = {
        "calc_mode": TEST_BINNING,
        "apply_mode": TEST_BINNING,
        "divide_total_mc": True,
        # expected number of unweighted MC events including events that fall outside of
        # the analysis binning
        "expected_total_mc": 1000,
        "variance_scale": 0.1,
    }
    fix_error_cfg = {
        "calc_mode": TEST_BINNING,
        "apply_mode": TEST_BINNING,
    }
    kde_cfg = {
        "calc_mode": "events",
        "apply_mode": TEST_BINNING,
        "bootstrap": False,
        "bootstrap_seed": 0,
        "bootstrap_niter": 6,
        "linearize_log_dims": True,
        "stash_hists": False,
        "coszen_name": "true_coszen",
        "stack_pid": False,
        "oversample": 1,
    }
Esempio n. 3
0
    def params(self):
        """Returns a ParamSet including all params of all detectors. First the shared params
        (if there are some), then all the "single detector" params. If two detectors use a
        parameter with the same name (but not shared), the name of the detector is added to the
        parameter name (except for the first detector).
        """
        params = ParamSet()
        for p_name in self.shared_params:
            for distribution_maker in self:
                try:
                    params.extend(distribution_maker.params[p_name])
                    break  # shared param found, can continue with the next shared param
                except:
                    continue  # shared param was not in this DistributionMaker, so search in the next one

        for distribution_maker in self:
            for param in distribution_maker.params:
                if param.name in params.names and param.name in self.shared_params:
                    continue  # shared param is already in param set, can continue with the next param
                elif param.name in params.names:  # two parameters with the same name but not shared
                    # add detector name to the parameter name
                    changed_param = deepcopy(param)
                    changed_param.name = param.name + '_' + distribution_maker._detector_name
                    params.extend(changed_param)
                else:
                    params.extend(param)
        return params
Esempio n. 4
0
    def update_params(self, params):
        for distribution_maker in self:
            distribution_maker.update_params(params)

        #if None in self.det_names: return # No detector names

        if isinstance(params,Param): params = ParamSet(params) # just for the following

        for p in params.names: # now update params with det_names inside
            for i, det_name in enumerate(self.det_names):
                if det_name in p:
                    cp = deepcopy(params[p])
                    cp.name = cp.name.replace('_'+det_name, "")
                    self._distribution_makers[i].update_params(cp)
Esempio n. 5
0
def create_mc_template(toymc_params, config_file=None, seed=None):
    '''
    Create MC template out of a pisa pipeline
    '''
    if seed is not None:
        np.random.seed(seed)

    Config = parse_pipeline_config(config_file)

    new_n_events_data = Param(name='n_events_data',
                              value=toymc_params.n_data,
                              prior=None,
                              range=None,
                              is_fixed=True)
    new_sig_frac = Param(name='signal_fraction',
                         value=toymc_params.signal_fraction,
                         prior=None,
                         range=None,
                         is_fixed=True)
    new_stats_factor = Param(name='stats_factor',
                             value=toymc_params.stats_factor,
                             prior=None,
                             range=None,
                             is_fixed=True)

    # These should match the values of the config file, but we override them just in case we need to change these later
    new_mu = Param(name='mu',
                   value=toymc_params.mu,
                   prior=None,
                   range=[0, 100],
                   is_fixed=False)
    new_sigma = Param(name='sigma',
                      value=toymc_params.sigma,
                      prior=None,
                      range=None,
                      is_fixed=True)
    Config[('data', 'pi_simple_signal')]['params'].update(p=ParamSet([
        new_n_events_data, new_sig_frac, new_stats_factor, new_mu, new_sigma
    ]))

    MCtemplate = DistributionMaker(Config)

    return MCtemplate
Esempio n. 6
0
def pipeline_cfg_from_states(state_dict):
    """Recover a pipeline cfg containing PISA objects from a raw state.

    When a pipeline configuration is stored to JSON, the PISA objects turn into
    their serialized states. This function looks through the dictionary returned by
    `from_json` and recovers the PISA objects such as `ParamSet` and `MultiDimBinning`.

    It should really become part of PISA file I/O functionality to read and write
    PISA objects inside dictionaries/lists into a JSON and be able to recover
    them...
    """

    # TODO: Make this a core functionality of PISA

    # This is just a mess... some objects have a `from_state` method, some take the
    # unpacked state dict as input, some take the state...

    pipeline_cfg = collections.OrderedDict()
    for stage_key in state_dict.keys():
        # need to check all of this manually... no automatic way to do it :(
        if stage_key == "pipeline":
            pipeline_cfg[stage_key] = copy.deepcopy(state_dict[stage_key])
            pipeline_cfg[stage_key]["output_key"] = tuple(
                pipeline_cfg[stage_key]["output_key"])
            binning_state = pipeline_cfg[stage_key]["output_binning"]
            pipeline_cfg[stage_key]["output_binning"] = MultiDimBinning(
                **binning_state)
            continue
        # undo what we did in `serialize_pipeline_cfg` by splitting the keys into tuples
        tuple_key = tuple(stage_key.split("__"))
        pipeline_cfg[tuple_key] = copy.deepcopy(state_dict[stage_key])
        for k in ["calc_mode", "apply_mode", "node_mode"]:
            if k in pipeline_cfg[tuple_key]:
                if isinstance(pipeline_cfg[tuple_key][k],
                              collections.abc.Mapping):
                    pipeline_cfg[tuple_key][k] = MultiDimBinning(
                        **pipeline_cfg[tuple_key][k])
        if "params" in pipeline_cfg[tuple_key].keys():
            pipeline_cfg[tuple_key]["params"] = ParamSet(
                pipeline_cfg[tuple_key]["params"])
    # if any stage takes any other arguments that we didn't think of here, they
    # won't work
    return pipeline_cfg
Esempio n. 7
0
 def params(self):
     params = ParamSet()
     for pipeline in self:
         params.extend(pipeline.params)
     return params
Esempio n. 8
0
 def params(self):
     """pisa.core.param.ParamSet : pipeline's parameters"""
     params = ParamSet()
     for stage in self:
         params.extend(stage.params)
     return params
Esempio n. 9
0
def main():
    global SIGMA
    args = vars(parse_args())
    set_verbosity(args.pop('v'))
    center_zero = args.pop('center_zero')

    make_pdf = False
    if args['pdf']:
        make_pdf = True
        args['pdf'] = False

    outdir = args.pop('outdir')
    fileio.mkdir(outdir, mode=0755)
    SIGMA *= args.pop('sigma')

    cfx_pipe = Pipeline(args.pop('cfx_pipeline'))

    signal = args.pop('signal').replace(' ', '').split(',')
    output_str = []
    for name in signal:
        if 'muons' in name or 'noise' in name:
            raise AssertionError('Are you trying to unfold muons/noise?')
        elif 'all_nu' in name:
            output_str = [str(NuFlavIntGroup(f)) for f in ALL_NUFLAVINTS]
        else:
            output_str.append(NuFlavIntGroup(name))
    output_str = [str(f) for f in output_str]
    cfx_pipe._output_names = output_str

    # Turn off stat fluctuations
    stat_param = cfx_pipe.params['stat_fluctuations']
    stat_param.value = 0 * ureg.dimensionless
    cfx_pipe.update_params(stat_param)

    # Get nominal Map
    re_param = cfx_pipe.params['regularisation']
    re_param.value = 0 * ureg.dimensionless
    cfx_pipe.update_params(re_param)
    nom_out = cfx_pipe.get_outputs()

    re_param.reset()
    cfx_pipe.update_params(re_param)

    params = ParamSet()
    for param in cfx_pipe.params:
        if param.name != 'dataset':
            params.extend(param)

    free = params.free
    logging.info('Free params = {0}'.format(free))
    contin = True
    for f in free:
        if 'hole_ice' not in f.name and 'dom_eff' not in f.name:
            continue
        # if 'atm_muon_scale' in f.name:
        #     contin = False
        # if contin:
        #     continue

        logging.info('Working on parameter {0}'.format(f.name))
        if f.prior.kind != 'uniform':
            # Use deltaLLH = SIGMA to define +/- sigma for non-uniform
            scan_over = np.linspace(*f.range, num=1000) * f.range[0].u
            llh = f.prior.llh(scan_over)
            dllh = llh - np.min(-llh)

            mllh_idx = np.argmin(-llh)
            if mllh_idx == 0:
                l_sig_idx = 0
            else:
                l_sig_idx = np.argmin(np.abs(dllh[:mllh_idx] - SIGMA))
            u_sig_idx = np.argmin(np.abs(dllh[mllh_idx:] - SIGMA)) + mllh_idx

            l_sigma = scan_over[l_sig_idx]
            u_sigma = scan_over[u_sig_idx]
        else:
            l_sigma = f.range[0]
            u_sigma = f.range[1]

        logging.info('Setting {0} lower sigma bound to '
                     '{1}'.format(f.name, l_sigma))
        f.value = l_sigma
        cfx_pipe.update_params(f)
        l_out = cfx_pipe.get_outputs()

        logging.info('Setting {0} upper sigma bound to '
                     '{1}'.format(f.name, u_sigma))
        f.value = u_sigma
        cfx_pipe.update_params(f)
        u_out = cfx_pipe.get_outputs()

        f.reset()
        cfx_pipe.update_params(f)

        f_outdir = outdir + '/' + f.name
        l_outdir = f_outdir + '/' + 'lower'
        u_outdir = f_outdir + '/' + 'upper'
        fileio.mkdir(f_outdir)
        fileio.mkdir(l_outdir)
        fileio.mkdir(u_outdir)

        compare(outdir=l_outdir,
                ref=MapSet([nom_out]),
                ref_label='baseline',
                test=MapSet([l_out]),
                test_label=r'-sigma',
                **args)
        compare(outdir=u_outdir,
                ref=MapSet([nom_out]),
                ref_label='baseline',
                test=MapSet([u_out]),
                test_label=r'+sigma',
                **args)

        l_in_mapset = l_outdir + '/' + 'fract_diff__-sigma___baseline.json.bz2'
        u_in_mapset = u_outdir + '/' + 'fract_diff__+sigma___baseline.json.bz2'
        l_in_map = MapSet.from_json(l_in_mapset).pop() * 100.
        u_in_map = MapSet.from_json(u_in_mapset).pop() * 100.

        if make_pdf:
            outfile = f_outdir + '/systematic_effect.pdf'
        else:
            outfile = f_outdir + '/systematic_effect.png'
        title = r'% effect on ' + r'${0}$'.format(l_in_map.tex) + \
                ' event counts for {0} parameter'.format(f.name)
        sub_titles = (r'(-\sigma - {\rm baseline}) \:/\: {\rm baseline}',
                      r'(+\sigma - {\rm baseline}) \:/\: {\rm baseline}')
        make_plot(
            maps=(l_in_map, u_in_map),
            outfile=outfile,
            logv=False,
            center_zero=center_zero,
            vlabel=r'({\rm change} - {\rm baseline}) \:/\: {\rm baseline} (%)',
            title=title,
            sub_titles=sub_titles)
Esempio n. 10
0
def create_mc_template(toymc_params,
                       config_file=None,
                       seed=None,
                       keep_same_weight=True):
    '''
    Create MC template out of a pisa pipeline
    '''
    if seed is not None:
        np.random.seed(seed)

    Config = parse_pipeline_config(config_file)

    # Change binning
    Config[('data', 'pi_simple_signal')]['output_specs'] = toymc_params.binning
    Config[(
        'likelihood',
        'pi_generalized_llh_params')]['output_specs'] = toymc_params.binning

    # If keep_same_weight is True, turn off the mean adjust and pseudo weight of pi_generalized_llh
    if keep_same_weight:
        Config[('likelihood',
                'pi_generalized_llh_params')]['with_mean_adjust'] = False
        Config[('likelihood',
                'pi_generalized_llh_params')]['with_pseudo_weight'] = False
    else:
        Config[('likelihood',
                'pi_generalized_llh_params')]['with_mean_adjust'] = True
        Config[('likelihood',
                'pi_generalized_llh_params')]['with_pseudo_weight'] = True

    new_n_events_data = Param(name='n_events_data',
                              value=toymc_params.n_data,
                              prior=None,
                              range=None,
                              is_fixed=True)
    new_sig_frac = Param(name='signal_fraction',
                         value=toymc_params.signal_fraction,
                         prior=None,
                         range=None,
                         is_fixed=True)
    new_stats_factor = Param(name='stats_factor',
                             value=toymc_params.stats_factor,
                             prior=None,
                             range=None,
                             is_fixed=True)

    # These should match the values of the config file, but we override them just in case we need to change these later
    new_mu = Param(name='mu',
                   value=toymc_params.mu,
                   prior=None,
                   range=[0, 100],
                   is_fixed=False)
    new_sigma = Param(name='sigma',
                      value=toymc_params.sigma,
                      prior=None,
                      range=None,
                      is_fixed=True)
    Config[('data', 'pi_simple_signal')]['params'].update(p=ParamSet([
        new_n_events_data, new_sig_frac, new_stats_factor, new_mu, new_sigma
    ]))

    MCtemplate = DistributionMaker(Config)

    return MCtemplate