re_param = pipeline.params['regularisation'] sf_param = pipeline.params['stat_fluctuations'] lt_param = pipeline.params['livetime'] unfold_pipeline_cfg = pipeline.params['unfold_pipeline_cfg'] mean_perpe = [] mean_perbin = [] for idx, lt in enumerate(livetimes): print '===========' print 'livetime = {0}'.format(lt) print '===========' mean_perpe.append([]) lt_param.value = lt pipeline.update_params(lt_param) gen_pipe.update_params(lt_param) u_pipe = Param(name='unfold_pipeline_cfg', value=gen_pipe, is_fixed=True, prior=None, range=None) unfold_pipeline_cfg = u_pipe pipeline.update_params(unfold_pipeline_cfg) # Get nominal re_param.value = 0 * ureg.dimensionless pipeline.update_params(re_param) nom_out = pipeline.get_outputs().pop()
def _calculate_fit_coeffs(data, params, fit_binning, nu_params=None, mu_params=None): """ Calculate the fit coefficients for each systematic, flavint, bin for a polynomial. """ logging.debug('Calculating fit coefficients') config = from_file(params['discr_sys_sample_config'].value) degree = int(params['poly_degree'].value) force_through_nominal = params['force_through_nominal'].value if force_through_nominal: def fit_func(vals, *poly_coeffs): return np.polynomial.polynomial.polyval( vals, [1.] + list(poly_coeffs)) else: def fit_func(vals, *poly_coeffs): return np.polynomial.polynomial.polyval( vals, list(poly_coeffs)) # add free param for constant term degree += 1 template_maker = Pipeline(params['pipeline_config'].value) dataset_param = template_maker.params['dataset'] def parse(string): return string.replace(' ', '').split(',') sys_fit_coeffs = OrderedDict() if nu_params is not None: sys_list = parse(config.get('neutrinos', 'sys_list')) nu_params = deepcopy(map(lambda x: x[3:], nu_params)) if set(nu_params) != set(sys_list): raise AssertionError( 'Systematics list listed in the sample config file does ' 'not match the params in the pipeline config file\n {0} ' '!= {1}'.format(set(nu_params), set(sys_list))) for sys in sys_list: ev_sys = 'neutrinos|' + sys runs = parse(config.get(ev_sys, 'runs')[1:-1]) nominal = config.get(ev_sys, 'nominal') mapset_dict = OrderedDict() flavint_groups = None for run in runs: logging.info('Loading run {0} of systematic ' '{1}'.format(run, sys)) dataset_param.value = ev_sys + '|' + run template_maker.update_params(dataset_param) template = template_maker.get_outputs( idx=int(params['stop_after_stage'].m)) if not isinstance(template, Data): raise AssertionError( 'Template output is not a Data object, instead is ' 'type {0}'.format(type(template))) if flavint_groups is None: flavint_groups = template.flavint_groups else: if set(flavint_groups) != set(template.flavint_groups): raise AssertionError( 'Mismatch of flavint_groups - ({0}) does not ' 'match flavint_groups ' '({1})'.format(flavint_groups, template.flavint_groups)) outputs = [] for fig in template.keys(): outputs.append( template.histogram(kinds=fig, binning=fit_binning, weights_col='pisa_weight', errors=False, name=str(NuFlavIntGroup(fig)))) mapset_dict[run] = MapSet(outputs, name=run) nom_mapset = mapset_dict[nominal] fracdiff_mapset_dict = OrderedDict() for run in runs: mapset = [] for flavintg_map in mapset_dict[run]: # TODO(shivesh): error propagation? flavintg = flavintg_map.name mask = ~(nom_mapset[flavintg].hist == 0.) div = np.zeros(flavintg_map.shape) with np.errstate(divide='ignore', invalid='ignore'): div[mask] = \ unp.nominal_values(flavintg_map.hist[mask]) /\ unp.nominal_values(nom_mapset[flavintg].hist[mask]) mapset.append( Map(name=flavintg, binning=flavintg_map.binning, hist=div)) fracdiff_mapset_dict[run] = MapSet(mapset) delta_runs = np.array([float(x) for x in runs]) - float(nominal) coeff_binning = OneDimBinning(name='coeff', num_bins=degree, is_lin=True, domain=[-1, 1]) combined_binning = fit_binning + coeff_binning params_mapset = [] for fig in template.keys(): # TODO(shivesh): Fix numpy warning on this line pvals_hist = np.empty(map(int, combined_binning.shape), dtype=object) hists = [ fracdiff_mapset_dict[run][fig].hist for run in runs ] zip_hists = np.dstack(hists) for idx in np.ndindex(fit_binning.shape): y_values = [] y_sigma = [] for run in fracdiff_mapset_dict: y_values.append( unp.nominal_values( fracdiff_mapset_dict[run][fig].hist[idx])) y_sigma.append( unp.std_devs( fracdiff_mapset_dict[run][fig].hist[idx])) if np.any(y_sigma): popt, pcov = curve_fit(fit_func, delta_runs, y_values, sigma=y_sigma, p0=np.ones(degree)) else: popt, pcov = curve_fit(fit_func, delta_runs, y_values, p0=np.ones(degree)) # perr = np.sqrt(np.diag(pcov)) # pvals = unp.uarray(popt, perr) pvals_hist[idx] = popt pvals_hist = np.array(pvals_hist.tolist()) params_mapset.append( Map(name=fig, binning=combined_binning, hist=pvals_hist)) params_mapset = MapSet(params_mapset, name=sys) if sys in sys_fit_coeffs: sys_fit_coeffs[sys] = MapSet( [sys_fit_coeffs[sys], params_mapset]) else: sys_fit_coeffs[sys] = params_mapset if mu_params is not None: sys_list = parse(config.get('muons', 'sys_list')) mu_params = deepcopy(map(lambda x: x[3:], mu_params)) if set(mu_params) != set(sys_list): raise AssertionError( 'Systematics list listed in the sample config file does ' 'not match the params in the pipeline config file\n {0} ' '!= {1}'.format(set(mu_params), set(sys_list))) for sys in sys_list: ev_sys = 'muons|' + sys runs = parse(config.get(ev_sys, 'runs')[1:-1]) nominal = config.get(ev_sys, 'nominal') map_dict = OrderedDict() flavint_groups = None for run in runs: logging.info('Loading run {0} of systematic ' '{1}'.format(run, sys)) dataset_param.value = ev_sys + '|' + run template_maker.update_params(dataset_param) template = template_maker.get_outputs( idx=int(params['stop_after_stage'].m)) if not isinstance(template, Data): raise AssertionError( 'Template output is not a Data object, instead is ' 'type {0}'.format(type(template))) if not template.contains_muons: raise AssertionError( 'Template output does not contain muons') output = template.histogram( kinds='muons', binning=fit_binning, # NOTE: weights cancel in fraction weights_col=None, errors=False, name='muons') map_dict[run] = output nom_map = map_dict[nominal] fracdiff_map_dict = OrderedDict() for run in runs: mask = ~(nom_map.hist == 0.) div = np.zeros(nom_map.shape) with np.errstate(divide='ignore', invalid='ignore'): div[mask] = \ unp.nominal_values(map_dict[run].hist[mask]) /\ unp.nominal_values(nom_map.hist[mask]) fracdiff_map_dict[run] = Map(name='muons', binning=nom_map.binning, hist=div) delta_runs = np.array([float(x) for x in runs]) - float(nominal) coeff_binning = OneDimBinning(name='coeff', num_bins=degree, is_lin=True, domain=[-1, 1]) combined_binning = fit_binning + coeff_binning pvals_hist = np.empty(map(int, combined_binning.shape), dtype=object) hists = [fracdiff_map_dict[run].hist for run in runs] zip_hists = np.dstack(hists) for idx in np.ndindex(fit_binning.shape): y_values = [] y_sigma = [] for run in fracdiff_mapset_dict: y_values.append( unp.nominal_values( fracdiff_mapset_dict[run][fig].hist[idx])) y_sigma.append( unp.std_devs( fracdiff_mapset_dict[run][fig].hist[idx])) if np.any(y_sigma): popt, pcov = curve_fit(fit_func, delta_runs, y_values, sigma=y_sigma, p0=np.ones(degree)) else: popt, pcov = curve_fit(fit_func, delta_runs, y_values, p0=np.ones(degree)) # perr = np.sqrt(np.diag(pcov)) # pvals = unp.uarray(popt, perr) pvals_hist[idx] = popt pvals_hist = np.array(pvals_hist.tolist()) params_map = Map(name='muons', binning=combined_binning, hist=pvals_hist) if sys in sys_fit_coeffs: sys_fit_coeffs[sys] = MapSet( [sys_fit_coeffs[sys], params_map]) else: sys_fit_coeffs[sys] = params_map return sys_fit_coeffs
def __init__(self, pipelines, label=None, set_livetime_from_data=True, profile=False): self.label = label self._source_code_hash = None self.metadata = OrderedDict() self._profile = profile self._pipelines = [] if isinstance(pipelines, (str, PISAConfigParser, OrderedDict, Pipeline)): pipelines = [pipelines] for pipeline in pipelines: if not isinstance(pipeline, Pipeline): pipeline = Pipeline(pipeline, profile=profile) self._pipelines.append(pipeline) data_run_livetime = None if set_livetime_from_data: # # Get livetime metadata if defined in any stage in any pipeline # for pipeline_idx, pipeline in enumerate(self): for stage_idx, stage in enumerate(pipeline): if not (hasattr(stage, "metadata") and isinstance(stage.metadata, Mapping) and "livetime" in stage.metadata): continue if data_run_livetime is None: data_run_livetime = stage.metadata["livetime"] if stage.metadata["livetime"] != data_run_livetime: raise ValueError( "Pipeline index {}, stage index {} has data" " livetime = {}, in disagreement with" " previously-found livetime = {}".format( pipeline_idx, stage_idx, stage.metadata["livetime"], data_run_livetime, )) # Save the last livetime found inside the pipeline's metadata # TODO: implement metadata in the pipeline class instead self.metadata['livetime'] = data_run_livetime # # Set param `params.livetime` for any pipelines that have it # if data_run_livetime is not None: data_run_livetime *= ureg.sec for pipeline_idx, pipeline in enumerate(self): if "livetime" not in pipeline.params.names: continue pipeline.params.livetime.is_fixed = True if pipeline.params.livetime != data_run_livetime: logging.warning( "Pipeline index %d has params.livetime = %s, in" " disagreement with data livetime = %s defined by" " data. The pipeline's livetime param will be" " reset to the latter value and set to be fixed" " (if it is not alredy).", pipeline_idx, pipeline.params.livetime.value, data_run_livetime, ) pipeline.params.livetime = data_run_livetime #for pipeline in self: # pipeline.select_params(self.param_selections, # error_on_missing=False) # Make sure that all the pipelines have the same detector name (or None) self.detector_name = 'no_name' for p in self._pipelines: name = p.detector_name if name != self.detector_name and self.detector_name != 'no_name': raise NameError( 'Different detector names in distribution_maker pipelines') self.detector_name = name # set parameters with an identical name to the same object # otherwise we get inconsistent behaviour when setting repeated params # See Isues #566 and #648 # Also, do this for all selections! original_selection = self.param_selections all_selections = set() for pipeline in self: for stage in pipeline.stages: all_selections.update( stage._param_selector._selector_params.keys()) for selection in all_selections: self.select_params(selection) all_params = self.params for pipeline in self: pipeline.update_params(all_params, existing_must_match=True, extend=False) self.select_params(original_selection)
def main(): args = parse_args() set_verbosity(args.v) if args.plot: import matplotlib as mpl mpl.use('pdf') import matplotlib.pyplot as plt from pisa.utils.plotter import Plotter cfg = from_file(args.fit_settings) sys_list = cfg.get('general', 'sys_list').replace(' ', '').split(',') stop_idx = cfg.getint('general', 'stop_after_stage') for sys in sys_list: # Parse info for given systematic nominal = cfg.getfloat(sys, 'nominal') degree = cfg.getint(sys, 'degree') force_through_nominal = cfg.getboolean(sys, 'force_through_nominal') runs = eval(cfg.get(sys, 'runs')) #print "runs ", runs smooth = cfg.get(sys, 'smooth') x_values = np.array(sorted(runs)) # Build fit function if force_through_nominal: function = "lambda x, *p: np.polynomial.polynomial.polyval(x, [1.] + list(p))" else: function = "lambda x, *p: np.polynomial.polynomial.polyval(x, list(p))" # Add free parameter for constant term degree += 1 fit_fun = eval(function) # Instantiate template maker template_maker = Pipeline(args.template_settings) if not args.set_param == '': for one_set_param in args.set_param: p_name, value = one_set_param.split("=") #print "p_name,value= ", p_name, " ", value value = parse_quantity(value) value = value.n * value.units param = template_maker.params[p_name] #print "old ", p_name, "value = ", param.value param.value = value #print "new ", p_name, "value = ", param.value template_maker.update_params(param) inputs = {} map_names = None # Get sys templates for run in runs: for key, val in cfg.items('%s:%s'%(sys, run)): if key.startswith('param.'): _, pname = key.split('.') param = template_maker.params[pname] try: value = parse_quantity(val) param.value = value.n * value.units except ValueError: value = parse_string_literal(val) param.value = value param.set_nominal_to_current_value() template_maker.update_params(param) # Retreive maps template = template_maker.get_outputs(idx=stop_idx) if map_names is None: map_names = [m.name for m in template] inputs[run] = {} for m in template: inputs[run][m.name] = m.hist # Numpy acrobatics: arrays = {} for name in map_names: arrays[name] = [] for x in x_values: arrays[name].append( inputs[x][name] / unp.nominal_values(inputs[nominal][name]) ) a = np.array(arrays[name]) arrays[name] = np.rollaxis(a, 0, len(a.shape)) # Shift to get deltas x_values -= nominal # Binning object (assuming they're all the same) binning = template.maps[0].binning shape = [d.num_bins for d in binning] + [degree] shape_small = [d.num_bins for d in binning] outputs = {} errors = {} for name in map_names: # Now actualy perform some fits outputs[name] = np.ones(shape) errors[name] = np.ones(shape) for idx in np.ndindex(*shape_small): y_values = unp.nominal_values(arrays[name][idx]) y_sigma = unp.std_devs(arrays[name][idx]) if np.any(y_sigma): popt, pcov = curve_fit(fit_fun, x_values, y_values, sigma=y_sigma, p0=np.ones(degree)) else: popt, pcov = curve_fit(fit_fun, x_values, y_values, p0=np.ones(degree)) perr = np.sqrt(np.diag(pcov)) for k, p in enumerate(popt): outputs[name][idx][k] = p errors[name][idx][k] = perr[k] # TODO(philippeller): the below block of code will fail # Maybe plot #if args.plot: # fig_num = i + nx * j # if fig_num == 0: # fig = plt.figure(num=1, figsize=( 4*nx, 4*ny)) # subplot_idx = nx*(ny-1-j)+ i + 1 # plt.subplot(ny, nx, subplot_idx) # #plt.snameter(x_values, y_values, color=plt_colors[name]) # plt.gca().errorbar(x_values, y_values, yerr=y_sigma, # fmt='o', color=plt_colors[name], # ecolor=plt_colors[name], # mec=plt_colors[name]) # # Plot nominal point again in black # plt.snameter([0.0], [1.0], color='k') # f_values = fit_fun(x_values, *popt) # fun_plot, = plt.plot(x_values, f_values, # color=plt_colors[name]) # plt.ylim(np.min(unp.nominal_values(arrays[name]))*0.9, # np.max(unp.nominal_values(arrays[name]))*1.1) # if i > 0: # plt.setp(plt.gca().get_yticklabels(), visible=False) # if j > 0: # plt.setp(plt.gca().get_xticklabels(), visible=False) if smooth == 'gauss': for name in map_names: for d in range(degree): outputs[name][...,d] = gaussian_filter(outputs[name][...,d],sigma=1) if smooth == 'gauss_pid': for name in map_names: split_idx = binning.names.index('pid') tot = len(binning)-1 for d in range(degree): for p in range(len(binning['pid'])): outputs[name][...,p,d] = gaussian_filter( np.swapaxes(outputs[name], split_idx, tot)[...,p,d], sigma=1 ) outputs[name] = np.swapaxes(outputs[name], split_idx, tot) # Save the raw ones anyway outputs['pname'] = sys outputs['nominal'] = nominal outputs['function'] = function outputs['map_names'] = map_names outputs['binning_hash'] = binning.hash to_file(outputs, '%s/%s_sysfits_%s_%s.json'%(args.out_dir, sys, args.tag, smooth)) if args.plot: for d in range(degree): maps = [] for name in map_names: maps.append(Map(name='%s_raw'%name, hist=outputs[name][...,d], binning=binning)) maps = MapSet(maps) my_plotter = Plotter( stamp='', outdir=args.out_dir, fmt='pdf', log=False, label='' ) my_plotter.plot_2d_array( maps, fname='%s_%s_%s_%s'%(sys, args.tag, d, smooth), )
def main(): global SIGMA args = vars(parse_args()) set_verbosity(args.pop('v')) center_zero = args.pop('center_zero') make_pdf = False if args['pdf']: make_pdf = True args['pdf'] = False outdir = args.pop('outdir') fileio.mkdir(outdir, mode=0755) SIGMA *= args.pop('sigma') cfx_pipe = Pipeline(args.pop('cfx_pipeline')) signal = args.pop('signal').replace(' ', '').split(',') output_str = [] for name in signal: if 'muons' in name or 'noise' in name: raise AssertionError('Are you trying to unfold muons/noise?') elif 'all_nu' in name: output_str = [str(NuFlavIntGroup(f)) for f in ALL_NUFLAVINTS] else: output_str.append(NuFlavIntGroup(name)) output_str = [str(f) for f in output_str] cfx_pipe._output_names = output_str # Turn off stat fluctuations stat_param = cfx_pipe.params['stat_fluctuations'] stat_param.value = 0 * ureg.dimensionless cfx_pipe.update_params(stat_param) # Get nominal Map re_param = cfx_pipe.params['regularisation'] re_param.value = 0 * ureg.dimensionless cfx_pipe.update_params(re_param) nom_out = cfx_pipe.get_outputs() re_param.reset() cfx_pipe.update_params(re_param) params = ParamSet() for param in cfx_pipe.params: if param.name != 'dataset': params.extend(param) free = params.free logging.info('Free params = {0}'.format(free)) contin = True for f in free: if 'hole_ice' not in f.name and 'dom_eff' not in f.name: continue # if 'atm_muon_scale' in f.name: # contin = False # if contin: # continue logging.info('Working on parameter {0}'.format(f.name)) if f.prior.kind != 'uniform': # Use deltaLLH = SIGMA to define +/- sigma for non-uniform scan_over = np.linspace(*f.range, num=1000) * f.range[0].u llh = f.prior.llh(scan_over) dllh = llh - np.min(-llh) mllh_idx = np.argmin(-llh) if mllh_idx == 0: l_sig_idx = 0 else: l_sig_idx = np.argmin(np.abs(dllh[:mllh_idx] - SIGMA)) u_sig_idx = np.argmin(np.abs(dllh[mllh_idx:] - SIGMA)) + mllh_idx l_sigma = scan_over[l_sig_idx] u_sigma = scan_over[u_sig_idx] else: l_sigma = f.range[0] u_sigma = f.range[1] logging.info('Setting {0} lower sigma bound to ' '{1}'.format(f.name, l_sigma)) f.value = l_sigma cfx_pipe.update_params(f) l_out = cfx_pipe.get_outputs() logging.info('Setting {0} upper sigma bound to ' '{1}'.format(f.name, u_sigma)) f.value = u_sigma cfx_pipe.update_params(f) u_out = cfx_pipe.get_outputs() f.reset() cfx_pipe.update_params(f) f_outdir = outdir + '/' + f.name l_outdir = f_outdir + '/' + 'lower' u_outdir = f_outdir + '/' + 'upper' fileio.mkdir(f_outdir) fileio.mkdir(l_outdir) fileio.mkdir(u_outdir) compare(outdir=l_outdir, ref=MapSet([nom_out]), ref_label='baseline', test=MapSet([l_out]), test_label=r'-sigma', **args) compare(outdir=u_outdir, ref=MapSet([nom_out]), ref_label='baseline', test=MapSet([u_out]), test_label=r'+sigma', **args) l_in_mapset = l_outdir + '/' + 'fract_diff__-sigma___baseline.json.bz2' u_in_mapset = u_outdir + '/' + 'fract_diff__+sigma___baseline.json.bz2' l_in_map = MapSet.from_json(l_in_mapset).pop() * 100. u_in_map = MapSet.from_json(u_in_mapset).pop() * 100. if make_pdf: outfile = f_outdir + '/systematic_effect.pdf' else: outfile = f_outdir + '/systematic_effect.png' title = r'% effect on ' + r'${0}$'.format(l_in_map.tex) + \ ' event counts for {0} parameter'.format(f.name) sub_titles = (r'(-\sigma - {\rm baseline}) \:/\: {\rm baseline}', r'(+\sigma - {\rm baseline}) \:/\: {\rm baseline}') make_plot( maps=(l_in_map, u_in_map), outfile=outfile, logv=False, center_zero=center_zero, vlabel=r'({\rm change} - {\rm baseline}) \:/\: {\rm baseline} (%)', title=title, sub_titles=sub_titles)