def get_outputs(self, return_sum=False, sum_map_name='total', sum_map_tex_name='Total', **kwargs): """Compute and return the outputs. Parameters ---------- return_sum : bool If True, add up all Maps in all MapSets returned by all pipelines. The result will be a single Map contained in a MapSet. If False, return a list where each element is the full MapSet returned by each pipeline in the DistributionMaker. **kwargs Passed on to each pipeline's `get_outputs` method. Returns ------- MapSet if `return_sum=True` or list of MapSets if `return_sum=False` """ outputs = [pipeline.get_outputs(**kwargs) for pipeline in self] # pylint: disable=redefined-outer-name if return_sum: # Case where the output of a pipeline is a mapSet if isinstance(outputs[0], MapSet): outputs = sum([sum(x) for x in outputs]) # This produces a Map outputs.name = sum_map_name outputs.tex = sum_map_tex_name outputs = MapSet(outputs) # final output must be a MapSet # Case where the output of a pipeline is a dict of different MapSets elif isinstance(outputs[0], OrderedDict): output_dict = OrderedDict() for key in outputs[0].keys(): output_dict[key] = sum([sum(A[key]) for A in outputs ]) # This produces a Map objects output_dict[key].name = sum_map_name output_dict[key].tex = sum_map_tex_name output_dict[key] = MapSet(output_dict[key]) outputs = output_dict return outputs
def _compute_outputs(self, inputs=None): # Following is just so that we only produce new maps when params # change, but produce the same maps with the same param values # (for a more realistic test of caching). seed = hash_obj(self.params.values, hash_to='int') % (2**32 - 1) np.random.seed(seed) # Convert a parameter that the user can specify in any (compatible) # units to the units used for compuation height = self.params['test'].to('meter').magnitude output_maps = [] for output_name in self.output_names: # Generate the fake per-bin "fluxes", modified by the parameter hist = np.random.random(self.output_binning.shape) * height # Put the "fluxes" into a Map object, give it the output_name m = Map(name=output_name, hist=hist, binning=self.output_binning) # Optionally turn on errors here, that will be propagated through # rest of pipeline (slows things down, but essential in some cases) #m.set_poisson_errors() output_maps.append(m) # Combine the output maps into a single MapSet object to return. # The MapSet contains the varous things that are necessary to make # caching work and also provides a nice interface for the user to all # of the contained maps return MapSet(maps=output_maps, name='flux maps')
def get_outputs(self, return_sum=False, sum_map_name='total', sum_map_tex_name='Total', **kwargs): """Compute and return the outputs. Parameters ---------- return_sum : bool If True, add up all Maps in all MapSets returned by all pipelines. The result will be a single Map contained in a MapSet. If False, return a list where each element is the full MapSet returned by each pipeline in the DistributionMaker. **kwargs Passed on to each pipeline's `get_outputs1` method. Returns ------- MapSet if `return_sum=True` or list of MapSets if `return_sum=False` """ outputs = [pipeline.get_outputs(**kwargs) for pipeline in self] # pylint: disable=redefined-outer-name if return_sum: if len(outputs) > 1: outputs = reduce(lambda x, y: sum(x) + sum(y), outputs) else: outputs = sum(sum(outputs)) outputs.name = sum_map_name outputs.tex = sum_map_tex_name outputs = MapSet(outputs) return outputs
def plot_2d_array(self, map_set, n_rows=None, n_cols=None, fname=None, **kwargs): """plot all maps or transforms in a single plot""" if fname is None: fname = map_set.name # if dimensionality is 3, then still define a spli_axis automatically new_maps = [] split_axis = kwargs.pop('split_axis', None) for map in map_set: if len(map.binning) == 3: if split_axis is None: # Find shortest dimension l = map.binning.num_bins idx = l.index(min(l)) split_axis_ = map.binning.names[idx] logging.warning( 'Plotter automatically splitting map %s along %s axis', map.name, split_axis_) new_maps.extend(map.split(split_axis_)) elif len(map.binning) == 2: new_maps.append(map) else: raise Exception('Cannot plot %i dimensional map in 2d' % len(map)) map_set = MapSet(new_maps) self.plot_array(map_set, 'plot_2d_map', n_rows=n_rows, n_cols=n_cols, **kwargs) self.dump(fname)
def _compute_outputs(self, inputs=None): """Apply scales to histograms, put them into PISA MapSets Also asign errors given a method: * sumw2 : just sum of weights quared as error (the usual weighte histo error) * sumw2+shae : including the shape difference * fixed_sumw2+shape : errors estimated from nominal paramter values, i.e. scale-invariant """ scale = self.params.atm_muon_scale.value.m_as('dimensionless') fixed_scale = self.params.atm_muon_scale.nominal_value.m_as('dimensionless') scale *= self.params.livetime.value.m_as('common_year') fixed_scale *= self.params.livetime.value.m_as('common_year') fixed_scale *= self.params.fixed_scale_factor.value.m_as('dimensionless') if self.error_method == 'sumw2': maps = [Map(name=self.output_names[0], hist=(self.icc_bg_hist * scale), error_hist=(np.sqrt(self.icc_bg_hist) * scale) ,binning=self.output_binning)] elif self.error_method == 'sumw2+shape': error = scale * np.sqrt(self.icc_bg_hist + (self.icc_bg_hist - self.alt_icc_bg_hist)**2 ) maps = [Map(name=self.output_names[0], hist=(self.icc_bg_hist * scale), error_hist=error ,binning=self.output_binning)] elif self.error_method == 'shape': error = scale * np.abs(self.icc_bg_hist - self.alt_icc_bg_hist) elif self.error_method == 'fixed_shape': error = fixed_scale * np.abs(self.icc_bg_hist - self.alt_icc_bg_hist) maps = [Map(name=self.output_names[0], hist=(self.icc_bg_hist * scale), error_hist=error ,binning=self.output_binning)] elif self.error_method == 'fixed_sumw2+shape': error = fixed_scale * np.sqrt(self.icc_bg_hist + (self.icc_bg_hist - self.alt_icc_bg_hist)**2 ) maps = [Map(name=self.output_names[0], hist=(self.icc_bg_hist * scale), error_hist=error ,binning=self.output_binning)] elif self.error_method == 'fixed_doublesumw2+shape': error = fixed_scale * np.sqrt(2*self.icc_bg_hist + (self.icc_bg_hist - self.alt_icc_bg_hist)**2 ) maps = [Map(name=self.output_names[0], hist=(self.icc_bg_hist * scale), error_hist=error ,binning=self.output_binning)] else: maps = [Map(name=self.output_names[0], hist=(self.icc_bg_hist * scale), binning=self.output_binning)] return MapSet(maps, name='icc')
def _compute_outputs(self, inputs=None): """Apply basic cuts and compute histograms for output channels.""" logging.debug('Entering sample._compute_outputs') self.config = from_file(self.params['data_sample_config'].value) name = self.config.get('general', 'name') logging.trace('{0} sample sample_hash = ' '{1}'.format(name, self.sample_hash)) self.load_sample_events() if self.params['keep_criteria'].value is not None: # TODO(shivesh) raise NotImplementedError( 'needs check to make sure this works in a DistributionMaker' ) self._data.applyCut(self.params['keep_criteria'].value) self._data.update_hash() if self.output_events: return self._data outputs = [] if self.neutrinos: trans_nu_data = self._data.transform_groups( self._output_nu_groups ) for fig in trans_nu_data.keys(): outputs.append(trans_nu_data.histogram( kinds = fig, binning = self.output_binning, weights_col = 'pisa_weight', errors = True, name = str(NuFlavIntGroup(fig)), )) if self.muons: outputs.append(self._data.histogram( kinds = 'muons', binning = self.output_binning, weights_col = 'pisa_weight', errors = True, name = 'muons', tex = r'\rm{muons}' )) if self.noise: outputs.append(self._data.histogram( kinds = 'noise', binning = self.output_binning, weights_col = 'pisa_weight', errors = True, name = 'noise', tex = r'\rm{noise}' )) name = self.config.get('general', 'name') return MapSet(maps=outputs, name=name)
def _compute_outputs(self, inputs=None): """Compute histograms for output channels.""" logging.debug('Entering fit._compute_outputs') if not isinstance(inputs, Data): raise AssertionError('inputs is not a Data object, instead is ' 'type {0}'.format(type(inputs))) self.weight_hash = deepcopy(inputs.metadata['weight_hash']) logging.trace('{0} fit weight_hash = ' '{1}'.format(inputs.metadata['name'], self.weight_hash)) logging.trace('{0} fit fit_hash = ' '{1}'.format(inputs.metadata['name'], self.fit_hash)) self._data = inputs self.reweight() if self.output_events: return self._data outputs = [] if self.neutrinos: trans_nu_data = self._data.transform_groups( self._output_nu_groups ) for fig in trans_nu_data.iterkeys(): outputs.append( trans_nu_data.histogram( kinds=fig, binning=self.output_binning, weights_col='pisa_weight', errors=True, name=str(NuFlavIntGroup(fig)), ) ) if self.muons: outputs.append( self._data.histogram( kinds='muons', binning=self.output_binning, weights_col='pisa_weight', errors=True, name='muons', tex=text2tex('muons') ) ) if self.noise: outputs.append( self._data.histogram( kinds='noise', binning=self.output_binning, weights_col='pisa_weight', errors=True, name='noise', tex=text2tex('noise') ) ) return MapSet(maps=outputs, name=self._data.metadata['name'])
def return_mapset(self, **kwargs): """Return a MapSet of stored spline maps.""" for signature in self._spline_data.iterkeys(): if not isinstance(self._spline_data[signature], Map): raise ValueError('Error: map {0} has not yet been ' 'computed'.format(signature)) maps = [self._spline_data[signature] for signature in self._spline_data.iterkeys()] return MapSet(maps=maps, **kwargs)
def main(): args = parse_args() set_verbosity(args.verbose) assert len(args.infiles[0]) == 2 assert len(args.subtitles) == 2 logging.info('Loading MapSet from files {0}'.format(args.infiles[0])) o_mapset = MapSet.from_json(args.infiles[0][0]) t_mapset = MapSet.from_json(args.infiles[0][1]) assert len(o_mapset) == 1 and len(t_mapset) == 1 o_map = o_mapset.pop() t_map = t_mapset.pop() outfile = args.outdir + '/' + args.outname make_plot( maps=(o_map, t_map), outfile=outfile, logv=args.logv, center_zero=args.center_zero, vlabel=args.vlabel, title=args.title, sub_titles=args.subtitles, )
def get_mapset(self, key, error=None): ''' Parameters ---------- key : str error : None or str specify a key that errors are read from For a given key, get a PISA MapSet ''' maps = [] for container in self: maps.append(container.get_map(key, error=error)) return MapSet(name=self.name, maps=maps)
def serve(config, ref, port=DFLT_PORT): """Instantiate PISA objects and run server for processing requests. Parameters ---------- config : str or iterable thereof Resource path(s) to pipeline config(s) ref : str Resource path to reference map port : int or str, optional """ # Instantiate the objects here to save having to do this repeatedly dist_maker = DistributionMaker(config) ref = MapSet.from_json(ref) # Define server as a closure such that it captures the above-instantiated objects class MyTCPHandler(SocketServer.BaseRequestHandler): """ The request handler class for our server. It is instantiated once per connection to the server, and must override the handle() method to implement communication to the client. See SocketServer.BaseRequestHandler for documentation of args. """ def handle(self): try: param_values = receive_obj(self.request) except ConnectionClosed: return dist_maker._set_rescaled_free_params(param_values) # pylint: disable=protected-access test_map = dist_maker.get_outputs(return_sum=True)[0] llh = test_map.llh( expected_values=ref, binned= False, # return sum over llh from all bins (not per-bin llh's) ) send_obj(llh, self.request) server = SocketServer.TCPServer((DFLT_HOST, int(port)), MyTCPHandler) print("llh server started on {}:{}".format(DFLT_HOST, port)) server.serve_forever()
def plot_array(self, map_set, fun, *args, **kwargs): """wrapper funtion to exccute plotting function fun for every map in a set distributed over a grid""" n_rows = kwargs.pop('n_rows', None) n_cols = kwargs.pop('n_cols', None) if isinstance(map_set, Map): map_set = MapSet([map_set]) if isinstance(map_set, MapSet): n = len(map_set) elif isinstance(map_set, TransformSet): n = len([x for x in map_set]) else: raise TypeError('Expecting to plot a MapSet or TransformSet but ' 'got %s' % type(map_set)) if n_rows is None and n_cols is None: # TODO: auto row/cols n_rows = np.floor(np.sqrt(n)) while n % n_rows != 0: n_rows -= 1 n_cols = n // n_rows if n > n_cols * n_rows: raise ValueError( 'trying to plot %s subplots on a grid with %s x %s cells' % (n, n_cols, n_rows)) size = (n_cols * self.size[0], n_rows * self.size[1]) self.init_fig(size) plt.tight_layout() h_margin = 1. / size[0] v_margin = 1. / size[1] self.fig.subplots_adjust(hspace=0.2, wspace=0.2, top=1 - v_margin, bottom=v_margin, left=h_margin, right=1 - h_margin) for i, map in enumerate(map_set): plt.subplot(n_rows, n_cols, i + 1) getattr(self, fun)(map, *args, **kwargs) self.add_stamp(map.tex)
def apply(self, inputs): """Apply each transform to `inputs`; return computed outputs. Parameters ----------- inputs : sequence of objects Returns ------- outputs : container with computed outputs (no sideband objects) """ output_names = [] outputs = [] # If any outputs have the same name, add them together to form a single # output for that name for xform in self: output = xform.apply(inputs) name = output.name try: idx = output_names.index(name) outputs[idx] = outputs[idx] + output outputs[idx].name = name except ValueError: outputs.append(output) output_names.append(name) # Automatically attach a sensible hash (this may be overwritten, but # the below should be a reasonable hash in most cases) if inputs.hash is None or self.hash is None: hash_ = None else: hash_ = hash_obj((inputs.hash, self.hash)) # TODO: what to set for map set's name, tex, etc. ? return MapSet(maps=outputs, hash=hash_)
def create_pseudo_data(toymc_params, seed=None): ''' Create pseudo data consisting of a gaussian peak on top of a uniform background ''' if seed is not None: np.random.seed(seed) binning = toymc_params.binning # # Gaussian signal peak # signal = np.random.normal(loc=toymc_params.mu, scale=toymc_params.sigma, size=toymc_params.nsig) # # Uniform background # background = np.random.uniform(high=toymc_params.nbackground_high, low=toymc_params.nbackground_low, size=toymc_params.nbkg) total_data = np.concatenate([signal, background]) counts_data, _ = np.histogram(total_data, bins=binning.bin_edges.magnitude) # Convert data histogram into a pisa map data_map = Map(name='total', binning=MultiDimBinning([binning]), hist=counts_data) # Set the errors as the sqrt of the counts data_map.set_errors(error_hist=np.sqrt(counts_data)) data_as_mapset = MapSet([data_map]) return data_as_mapset
def _compute_nominal_outputs(self): """load the evnts from file, perform sanity checks and histogram them (into final MapSet) """ # get params data_file_name = self.params.data_file.value sim_version = self.params.sim_ver.value bdt_cut = self.params.bdt_cut.value.m_as('dimensionless') self.bin_names = self.output_binning.names # TODO: convert units using e.g. `comp_units` in stages/reco/hist.py self.bin_edges = [] for name in self.bin_names: if 'energy' in name: bin_edges = self.output_binning[name].bin_edges.to('GeV').magnitude else: bin_edges = self.output_binning[name].bin_edges.magnitude self.bin_edges.append(bin_edges) # the rest of this function is PISA v2 legacy code... # right now only use burn sample with sim_version = '4digit' #print "sim_version == ", sim_version if sim_version == "4digit": Reco_Neutrino_Name = 'IC86_Dunkman_L6_MultiNest8D_PDG_Neutrino' Reco_Track_Name = 'IC86_Dunkman_L6_MultiNest8D_PDG_Track' elif sim_version == "5digit" or sim_version=="dima": Reco_Neutrino_Name = 'IC86_Dunkman_L6_PegLeg_MultiNest8D_NumuCC' Reco_Track_Name = 'IC86_Dunkman_L6_PegLeg_MultiNest8D_Track' else: raise ValueError( 'only allow 4digit, 5digit(H2 model for hole ice) or' ' dima (dima p1 and p2 for hole ice)!' ) data_file = h5py.File(find_resource(data_file_name), 'r') L6_result = np.array(data_file['IC86_Dunkman_L6']['result']) dLLH = np.array(data_file['IC86_Dunkman_L6']['delta_LLH']) reco_energy_all = np.array(data_file[Reco_Neutrino_Name]['energy']) reco_coszen_all = np.array(np.cos( data_file[Reco_Neutrino_Name]['zenith'] )) reco_trck_len_all = np.array(data_file[Reco_Track_Name]['length']) #print "before L6 cut, no. of burn sample = ", len(reco_coszen_all) # sanity check santa_doms = data_file['IC86_Dunkman_L6_SANTA_DirectDOMs']['value'] l3 = data_file['IC86_Dunkman_L3']['value'] l4 = data_file['IC86_Dunkman_L4']['result'] l5 = data_file['IC86_Dunkman_L5']['bdt_score'] assert(np.all(santa_doms>=3) and np.all(l3 == 1) and np.all(l5 >= 0.1)) # l4==1 was not applied when i3 files were written to hdf5 files, so do # it here dLLH = dLLH[l4==1] reco_energy_all = reco_energy_all[l4==1] reco_coszen_all = reco_coszen_all[l4==1] l5 = l5[l4==1] L6_result = L6_result[l4==1] data_file.close() dLLH_L6 = dLLH[L6_result==1] l5 = l5[L6_result==1] reco_energy_L6 = reco_energy_all[L6_result==1] reco_coszen_L6 = reco_coszen_all[L6_result==1] #print "after L6 cut, no. of burn sample = ", len(reco_coszen_L6) # Cut: Only keep bdt score >= 0.2 (from MSU latest result, make data/MC # agree much better); if use no such further cut, use bdt_cut = 0.1 logging.info( "Cut2, removing events with bdt_score < %s i.e. only keep bdt > %s" %(bdt_cut, bdt_cut) ) cut_events = {} cut = l5>=bdt_cut cut_events['reco_energy'] = reco_energy_L6[cut] cut_events['reco_coszen'] = reco_coszen_L6[cut] cut_events['pid'] = dLLH_L6[cut] hist, _ = np.histogramdd(sample = np.array( [cut_events[bin_name] for bin_name in self.bin_names] ).T, bins=self.bin_edges) maps = [Map(name=self.output_names[0], hist=hist, binning=self.output_binning)] self.template = MapSet(maps, name='data')
def _compute_outputs(self, inputs=None): """Compute histograms for output channels.""" logging.debug('Entering roounfold._compute_outputs') self.fit_hash = deepcopy(inputs.metadata['fit_hash']) logging.trace('{0} roounfold fit_hash = ' '{1}'.format(inputs.metadata['name'], self.fit_hash)) if self.random_state is not None: logging.trace('{0} roounfold random_state = ' '{1}'.format(inputs.metadata['name'], hash_obj( self.random_state.get_state()))) if not isinstance(inputs, Data): raise AssertionError('inputs is not a Data object, instead is ' 'type {0}'.format(type(inputs))) self._data = inputs if not self.params['return_eff'].value: if len(self.output_names) > 1: raise AssertionError( 'Specified more than one NuFlavIntGroup as ' 'signal, {0}'.format(self.output_names)) self.output_str = str(self.output_names[0]) real_data = self.params['real_data'].value if real_data: logging.debug('Using real data') if 'nuall' not in self._data: raise AssertionError( 'When using real data, input Data object must contain ' 'only one element "nuall" containing the data, instead it ' 'contains elements {0}'.format(self._data.keys())) if self.disk_cache is None: raise AssertionError( 'No disk_cache specified from which to load - using real ' 'data requires object such as the response object to be ' 'cached to disk.') if self.params['optimize_reg'].value and real_data: raise AssertionError( 'Cannot optimize the regularation if using real data.') if int(self.params['stat_fluctuations'].m) != 0 and real_data: raise AssertionError( 'Cannot do poisson fluctuations if using real data.') if self.params['return_eff'].value and real_data: raise AssertionError( 'Not implemented return of efficiency maps if using real data.' ) if self.params['return_eff'].value: fin_data = self._data # Load generator level data for signal unfold_pipeline_cfg = self.params['unfold_pipeline_cfg'].value pipeline_cfg = from_file(unfold_pipeline_cfg) template_maker = Pipeline(pipeline_cfg) gen_data = template_maker.get_outputs() fin_data = fin_data.transform_groups(self.output_names) gen_data = gen_data.transform_groups(self.output_names) efficiencies = [] assert set(fin_data.keys()) == set(gen_data.keys()) for fig in fin_data.keys(): figd_f = fin_data[fig] figd_g = gen_data[fig] inv_eff = self._get_inv_eff(figd_f, figd_g, self.true_binning, fig) i_mask = ~(inv_eff == 0.) eff = unp.uarray(np.zeros(self.true_binning.shape), np.zeros(self.true_binning.shape)) eff[i_mask] = 1. / inv_eff[i_mask] efficiencies.append( Map(name=fig, hist=eff, binning=self.true_binning)) return MapSet(efficiencies) # TODO(shivesh): [ TRACE] None of the selections ['iron', 'nh'] found in this pipeline. # TODO(shivesh): Fix "smearing_matrix" memory leak # TODO(shivesh): Fix unweighted unfolding # TODO(shivesh): different algorithms # TODO(shivesh): implement handling of 0 division inside Map objects if real_data: unfold_map = self.unfold_real_data() else: unfold_map = self.unfold_mc() return MapSet([unfold_map])
def main(return_outputs=False): """Run unit tests if `pipeline.py` is called as a script.""" from pisa.utils.plotter import Plotter args = parse_args() set_verbosity(args.v) # Even if user specifies an integer on command line, it comes in as a # string. Try to convert to int (e.g. if `'1'` is passed to indicate the # second stage), and -- if successful -- use this as `args.only_stage`. # Otherwise, the string value passed will be used (e.g. `'osc'` could be # passed). try: only_stage_int = int(args.only_stage) except (ValueError, TypeError): pass else: args.only_stage = only_stage_int if args.outdir: mkdir(args.outdir) else: if args.pdf or args.png: raise ValueError("No --outdir provided, so cannot save images.") # Most basic parsing of the pipeline config (parsing only to this level # allows for simple strings to be specified as args for updating) bcp = PISAConfigParser() bcp.read(args.pipeline) # Update the config with any args specified on command line if args.arg is not None: for arg_list in args.arg: if len(arg_list) < 2: raise ValueError( 'Args must be formatted as: "section arg=val". Got "%s"' " instead." % " ".join(arg_list)) section = arg_list[0] remainder = " ".join(arg_list[1:]) eq_split = remainder.split("=") newarg = eq_split[0].strip() value = ("=".join(eq_split[1:])).strip() logging.debug('Setting config section "%s" arg "%s" = "%s"', section, newarg, value) try: bcp.set(section, newarg, value) except NoSectionError: logging.error( 'Invalid section "%s" specified. Must be one of %s', section, bcp.sections(), ) raise # Instantiate the pipeline pipeline = Pipeline(bcp) # pylint: disable=redefined-outer-name if args.select is not None: pipeline.select_params(args.select, error_on_missing=True) if args.only_stage is None: stop_idx = args.stop_after_stage try: stop_idx = int(stop_idx) except (TypeError, ValueError): pass if isinstance(stop_idx, str): stop_idx = pipeline.index(stop_idx) outputs = pipeline.get_outputs(idx=stop_idx) # pylint: disable=redefined-outer-name if stop_idx is not None: stop_idx += 1 indices = slice(0, stop_idx) else: assert args.stop_after_stage is None idx = pipeline.index(args.only_stage) stage = pipeline[idx] indices = slice(idx, idx + 1) # Create dummy inputs if necessary inputs = None if hasattr(stage, "input_binning"): logging.warning( "Stage requires input, so building dummy" " inputs of random numbers, with random state set to the input" " index according to alphabetical ordering of input names and" " filled in alphabetical ordering of dimension names.") input_maps = [] tmp = deepcopy(stage.input_binning) alphabetical_binning = tmp.reorder_dimensions(sorted(tmp.names)) for input_num, input_name in enumerate(sorted(stage.input_names)): # Create a new map with all 3's; name according to the input hist = np.full(shape=alphabetical_binning.shape, fill_value=3.0) input_map = Map(name=input_name, binning=alphabetical_binning, hist=hist) # Apply Poisson fluctuations to randomize the values in the map input_map.fluctuate(method="poisson", random_state=input_num) # Reorder dimensions according to user's original binning spec input_map.reorder_dimensions(stage.input_binning) input_maps.append(input_map) inputs = MapSet(maps=input_maps, name="ones", hash=1) outputs = stage.run(inputs=inputs) for stage in pipeline[indices]: if not args.outdir: break stg_svc = stage.stage_name + "__" + stage.service_name fbase = os.path.join(args.outdir, stg_svc) if args.intermediate or stage == pipeline[indices][-1]: stage.outputs.to_json(fbase + "__output.json.bz2") # also only plot if args intermediate or last stage if args.intermediate or stage == pipeline[indices][-1]: formats = OrderedDict(png=args.png, pdf=args.pdf) if isinstance(stage.outputs, Data): # TODO(shivesh): plots made here will use the most recent # "pisa_weight" column and so all stages will have identical plots # (one workaround is to turn on "memcache_deepcopy") # TODO(shivesh): intermediate stages have no output binning if stage.output_binning is None: logging.debug("Skipping plot of intermediate stage %s", stage) continue outputs = stage.outputs.histogram_set( binning=stage.output_binning, nu_weights_col="pisa_weight", mu_weights_col="pisa_weight", noise_weights_col="pisa_weight", mapset_name=stg_svc, errors=True, ) try: for fmt, enabled in formats.items(): if not enabled: continue my_plotter = Plotter( stamp="Event rate", outdir=args.outdir, fmt=fmt, log=False, annotate=args.annotate, ) my_plotter.ratio = True my_plotter.plot_2d_array(outputs, fname=stg_svc + "__output", cmap="RdBu") except ValueError as exc: logging.error( "Failed to save plot to format %s. See exception" " message below", fmt, ) traceback.format_exc() logging.exception(exc) logging.warning("I can't go on, I'll go on.") if return_outputs: return pipeline, outputs
def get_outputs(self, inputs=None): """Top-level function for computing outputs. Use this method to get outputs if you live outside this stage/service. Caching is handled here, so if the output hash returned by `_derive_outputs_hash` is in `outputs_cache`, it is simply returned. Otherwise, the `_compute_outputs` private method is invoked to do the actual work of computing outputs. Parameters ---------- inputs : None or Mapping Any inputs to be transformed, plus any sideband objects that are to be passed on (untransformed) to subsequent stages. See also -------- Overloadable methods called directly from this: _derive_outputs_hash _compute_outputs """ # Reset flags self.outputs_loaded_from_cache = None self.outputs_computed = False # TODO: store nominal outputs to the outputs cache as well, but # derive the hash value the same way as it is done for outputs, # to avoid needing to apply no systematics to the nominal outputs # to get the (identical) outputs? # Problem: assumes the nominal transform is the same as the outputs # that will result, which *might* not be true (though it seems it will # usually be so) # Keep inputs for internal use and for inspection later self.inputs = inputs outputs_hash, transforms_hash, nominal_transforms_hash = ( self._derive_outputs_hash() ) # Compute nominal outputs; if feature is not used, this doesn't # actually do much of anything. To do more than this, override the # `_compute_nominal_outputs` method. self.get_nominal_outputs(nominal_outputs_hash=nominal_transforms_hash) logging.trace("outputs_hash: %s" % outputs_hash) if ( self.outputs_cache is not None and outputs_hash is not None and outputs_hash in self.outputs_cache and self.debug_mode is None ): self.outputs_loaded_from_cache = "memory" logging.trace("Loading outputs from cache.") outputs = self.outputs_cache[outputs_hash] else: logging.trace("Need to compute outputs...") if self.use_transforms: self.get_transforms( transforms_hash=transforms_hash, nominal_transforms_hash=nominal_transforms_hash, ) logging.trace("... now computing outputs.") outputs = self._compute_outputs(inputs=self.inputs) self.check_outputs(outputs) if isinstance(outputs, (Map, MapSet)): outputs = outputs.rebin(self.output_binning) outputs.hash = outputs_hash self.outputs_computed = True # Store output to cache if self.outputs_cache is not None and outputs_hash is not None: self.outputs_cache[outputs_hash] = outputs # Keep outputs for inspection later self.outputs = outputs # Attach sideband objects (i.e., inputs not specified in # `self.input_names`) to the "augmented" output object if self.inputs is None: names_in_inputs = set() else: names_in_inputs = set(self.inputs.names) unused_input_names = names_in_inputs.difference(self.input_names) if len(unused_input_names) == 0: return outputs # TODO: update logic for Data object, generic sideband objects # Create a new output container different from `outputs` but copying # the contents, for purposes of attaching the sideband objects found. if isinstance(outputs, MapSet): augmented_outputs = MapSet(outputs) for name in unused_input_names: augmented_outputs.append(inputs[name]) return augmented_outputs else: raise TypeError( "Outputs are %s, but must currently be a MapSet in" " the case that the input includes sideband" " objects." % type(outputs) )
def _calculate_fit_coeffs(data, params, fit_binning, nu_params=None, mu_params=None): """ Calculate the fit coefficients for each systematic, flavint, bin for a polynomial. """ logging.debug('Calculating fit coefficients') config = from_file(params['discr_sys_sample_config'].value) degree = int(params['poly_degree'].value) force_through_nominal = params['force_through_nominal'].value if force_through_nominal: def fit_func(vals, *poly_coeffs): return np.polynomial.polynomial.polyval( vals, [1.] + list(poly_coeffs)) else: def fit_func(vals, *poly_coeffs): return np.polynomial.polynomial.polyval( vals, list(poly_coeffs)) # add free param for constant term degree += 1 template_maker = Pipeline(params['pipeline_config'].value) dataset_param = template_maker.params['dataset'] def parse(string): return string.replace(' ', '').split(',') sys_fit_coeffs = OrderedDict() if nu_params is not None: sys_list = parse(config.get('neutrinos', 'sys_list')) nu_params = deepcopy(map(lambda x: x[3:], nu_params)) if set(nu_params) != set(sys_list): raise AssertionError( 'Systematics list listed in the sample config file does ' 'not match the params in the pipeline config file\n {0} ' '!= {1}'.format(set(nu_params), set(sys_list))) for sys in sys_list: ev_sys = 'neutrinos|' + sys runs = parse(config.get(ev_sys, 'runs')[1:-1]) nominal = config.get(ev_sys, 'nominal') mapset_dict = OrderedDict() flavint_groups = None for run in runs: logging.info('Loading run {0} of systematic ' '{1}'.format(run, sys)) dataset_param.value = ev_sys + '|' + run template_maker.update_params(dataset_param) template = template_maker.get_outputs( idx=int(params['stop_after_stage'].m)) if not isinstance(template, Data): raise AssertionError( 'Template output is not a Data object, instead is ' 'type {0}'.format(type(template))) if flavint_groups is None: flavint_groups = template.flavint_groups else: if set(flavint_groups) != set(template.flavint_groups): raise AssertionError( 'Mismatch of flavint_groups - ({0}) does not ' 'match flavint_groups ' '({1})'.format(flavint_groups, template.flavint_groups)) outputs = [] for fig in template.keys(): outputs.append( template.histogram(kinds=fig, binning=fit_binning, weights_col='pisa_weight', errors=False, name=str(NuFlavIntGroup(fig)))) mapset_dict[run] = MapSet(outputs, name=run) nom_mapset = mapset_dict[nominal] fracdiff_mapset_dict = OrderedDict() for run in runs: mapset = [] for flavintg_map in mapset_dict[run]: # TODO(shivesh): error propagation? flavintg = flavintg_map.name mask = ~(nom_mapset[flavintg].hist == 0.) div = np.zeros(flavintg_map.shape) with np.errstate(divide='ignore', invalid='ignore'): div[mask] = \ unp.nominal_values(flavintg_map.hist[mask]) /\ unp.nominal_values(nom_mapset[flavintg].hist[mask]) mapset.append( Map(name=flavintg, binning=flavintg_map.binning, hist=div)) fracdiff_mapset_dict[run] = MapSet(mapset) delta_runs = np.array([float(x) for x in runs]) - float(nominal) coeff_binning = OneDimBinning(name='coeff', num_bins=degree, is_lin=True, domain=[-1, 1]) combined_binning = fit_binning + coeff_binning params_mapset = [] for fig in template.keys(): # TODO(shivesh): Fix numpy warning on this line pvals_hist = np.empty(map(int, combined_binning.shape), dtype=object) hists = [ fracdiff_mapset_dict[run][fig].hist for run in runs ] zip_hists = np.dstack(hists) for idx in np.ndindex(fit_binning.shape): y_values = [] y_sigma = [] for run in fracdiff_mapset_dict: y_values.append( unp.nominal_values( fracdiff_mapset_dict[run][fig].hist[idx])) y_sigma.append( unp.std_devs( fracdiff_mapset_dict[run][fig].hist[idx])) if np.any(y_sigma): popt, pcov = curve_fit(fit_func, delta_runs, y_values, sigma=y_sigma, p0=np.ones(degree)) else: popt, pcov = curve_fit(fit_func, delta_runs, y_values, p0=np.ones(degree)) # perr = np.sqrt(np.diag(pcov)) # pvals = unp.uarray(popt, perr) pvals_hist[idx] = popt pvals_hist = np.array(pvals_hist.tolist()) params_mapset.append( Map(name=fig, binning=combined_binning, hist=pvals_hist)) params_mapset = MapSet(params_mapset, name=sys) if sys in sys_fit_coeffs: sys_fit_coeffs[sys] = MapSet( [sys_fit_coeffs[sys], params_mapset]) else: sys_fit_coeffs[sys] = params_mapset if mu_params is not None: sys_list = parse(config.get('muons', 'sys_list')) mu_params = deepcopy(map(lambda x: x[3:], mu_params)) if set(mu_params) != set(sys_list): raise AssertionError( 'Systematics list listed in the sample config file does ' 'not match the params in the pipeline config file\n {0} ' '!= {1}'.format(set(mu_params), set(sys_list))) for sys in sys_list: ev_sys = 'muons|' + sys runs = parse(config.get(ev_sys, 'runs')[1:-1]) nominal = config.get(ev_sys, 'nominal') map_dict = OrderedDict() flavint_groups = None for run in runs: logging.info('Loading run {0} of systematic ' '{1}'.format(run, sys)) dataset_param.value = ev_sys + '|' + run template_maker.update_params(dataset_param) template = template_maker.get_outputs( idx=int(params['stop_after_stage'].m)) if not isinstance(template, Data): raise AssertionError( 'Template output is not a Data object, instead is ' 'type {0}'.format(type(template))) if not template.contains_muons: raise AssertionError( 'Template output does not contain muons') output = template.histogram( kinds='muons', binning=fit_binning, # NOTE: weights cancel in fraction weights_col=None, errors=False, name='muons') map_dict[run] = output nom_map = map_dict[nominal] fracdiff_map_dict = OrderedDict() for run in runs: mask = ~(nom_map.hist == 0.) div = np.zeros(nom_map.shape) with np.errstate(divide='ignore', invalid='ignore'): div[mask] = \ unp.nominal_values(map_dict[run].hist[mask]) /\ unp.nominal_values(nom_map.hist[mask]) fracdiff_map_dict[run] = Map(name='muons', binning=nom_map.binning, hist=div) delta_runs = np.array([float(x) for x in runs]) - float(nominal) coeff_binning = OneDimBinning(name='coeff', num_bins=degree, is_lin=True, domain=[-1, 1]) combined_binning = fit_binning + coeff_binning pvals_hist = np.empty(map(int, combined_binning.shape), dtype=object) hists = [fracdiff_map_dict[run].hist for run in runs] zip_hists = np.dstack(hists) for idx in np.ndindex(fit_binning.shape): y_values = [] y_sigma = [] for run in fracdiff_mapset_dict: y_values.append( unp.nominal_values( fracdiff_mapset_dict[run][fig].hist[idx])) y_sigma.append( unp.std_devs( fracdiff_mapset_dict[run][fig].hist[idx])) if np.any(y_sigma): popt, pcov = curve_fit(fit_func, delta_runs, y_values, sigma=y_sigma, p0=np.ones(degree)) else: popt, pcov = curve_fit(fit_func, delta_runs, y_values, p0=np.ones(degree)) # perr = np.sqrt(np.diag(pcov)) # pvals = unp.uarray(popt, perr) pvals_hist[idx] = popt pvals_hist = np.array(pvals_hist.tolist()) params_map = Map(name='muons', binning=combined_binning, hist=pvals_hist) if sys in sys_fit_coeffs: sys_fit_coeffs[sys] = MapSet( [sys_fit_coeffs[sys], params_map]) else: sys_fit_coeffs[sys] = params_map return sys_fit_coeffs
def main(): args = parse_args() set_verbosity(args.v) if args.plot: import matplotlib as mpl mpl.use('pdf') import matplotlib.pyplot as plt from pisa.utils.plotter import Plotter cfg = from_file(args.fit_settings) sys_list = cfg.get('general', 'sys_list').replace(' ', '').split(',') stop_idx = cfg.getint('general', 'stop_after_stage') for sys in sys_list: # Parse info for given systematic nominal = cfg.getfloat(sys, 'nominal') degree = cfg.getint(sys, 'degree') force_through_nominal = cfg.getboolean(sys, 'force_through_nominal') runs = eval(cfg.get(sys, 'runs')) #print "runs ", runs smooth = cfg.get(sys, 'smooth') x_values = np.array(sorted(runs)) # Build fit function if force_through_nominal: function = "lambda x, *p: np.polynomial.polynomial.polyval(x, [1.] + list(p))" else: function = "lambda x, *p: np.polynomial.polynomial.polyval(x, list(p))" # Add free parameter for constant term degree += 1 fit_fun = eval(function) # Instantiate template maker template_maker = Pipeline(args.template_settings) if not args.set_param == '': for one_set_param in args.set_param: p_name, value = one_set_param.split("=") #print "p_name,value= ", p_name, " ", value value = parse_quantity(value) value = value.n * value.units param = template_maker.params[p_name] #print "old ", p_name, "value = ", param.value param.value = value #print "new ", p_name, "value = ", param.value template_maker.update_params(param) inputs = {} map_names = None # Get sys templates for run in runs: for key, val in cfg.items('%s:%s'%(sys, run)): if key.startswith('param.'): _, pname = key.split('.') param = template_maker.params[pname] try: value = parse_quantity(val) param.value = value.n * value.units except ValueError: value = parse_string_literal(val) param.value = value param.set_nominal_to_current_value() template_maker.update_params(param) # Retreive maps template = template_maker.get_outputs(idx=stop_idx) if map_names is None: map_names = [m.name for m in template] inputs[run] = {} for m in template: inputs[run][m.name] = m.hist # Numpy acrobatics: arrays = {} for name in map_names: arrays[name] = [] for x in x_values: arrays[name].append( inputs[x][name] / unp.nominal_values(inputs[nominal][name]) ) a = np.array(arrays[name]) arrays[name] = np.rollaxis(a, 0, len(a.shape)) # Shift to get deltas x_values -= nominal # Binning object (assuming they're all the same) binning = template.maps[0].binning shape = [d.num_bins for d in binning] + [degree] shape_small = [d.num_bins for d in binning] outputs = {} errors = {} for name in map_names: # Now actualy perform some fits outputs[name] = np.ones(shape) errors[name] = np.ones(shape) for idx in np.ndindex(*shape_small): y_values = unp.nominal_values(arrays[name][idx]) y_sigma = unp.std_devs(arrays[name][idx]) if np.any(y_sigma): popt, pcov = curve_fit(fit_fun, x_values, y_values, sigma=y_sigma, p0=np.ones(degree)) else: popt, pcov = curve_fit(fit_fun, x_values, y_values, p0=np.ones(degree)) perr = np.sqrt(np.diag(pcov)) for k, p in enumerate(popt): outputs[name][idx][k] = p errors[name][idx][k] = perr[k] # TODO(philippeller): the below block of code will fail # Maybe plot #if args.plot: # fig_num = i + nx * j # if fig_num == 0: # fig = plt.figure(num=1, figsize=( 4*nx, 4*ny)) # subplot_idx = nx*(ny-1-j)+ i + 1 # plt.subplot(ny, nx, subplot_idx) # #plt.snameter(x_values, y_values, color=plt_colors[name]) # plt.gca().errorbar(x_values, y_values, yerr=y_sigma, # fmt='o', color=plt_colors[name], # ecolor=plt_colors[name], # mec=plt_colors[name]) # # Plot nominal point again in black # plt.snameter([0.0], [1.0], color='k') # f_values = fit_fun(x_values, *popt) # fun_plot, = plt.plot(x_values, f_values, # color=plt_colors[name]) # plt.ylim(np.min(unp.nominal_values(arrays[name]))*0.9, # np.max(unp.nominal_values(arrays[name]))*1.1) # if i > 0: # plt.setp(plt.gca().get_yticklabels(), visible=False) # if j > 0: # plt.setp(plt.gca().get_xticklabels(), visible=False) if smooth == 'gauss': for name in map_names: for d in range(degree): outputs[name][...,d] = gaussian_filter(outputs[name][...,d],sigma=1) if smooth == 'gauss_pid': for name in map_names: split_idx = binning.names.index('pid') tot = len(binning)-1 for d in range(degree): for p in range(len(binning['pid'])): outputs[name][...,p,d] = gaussian_filter( np.swapaxes(outputs[name], split_idx, tot)[...,p,d], sigma=1 ) outputs[name] = np.swapaxes(outputs[name], split_idx, tot) # Save the raw ones anyway outputs['pname'] = sys outputs['nominal'] = nominal outputs['function'] = function outputs['map_names'] = map_names outputs['binning_hash'] = binning.hash to_file(outputs, '%s/%s_sysfits_%s_%s.json'%(args.out_dir, sys, args.tag, smooth)) if args.plot: for d in range(degree): maps = [] for name in map_names: maps.append(Map(name='%s_raw'%name, hist=outputs[name][...,d], binning=binning)) maps = MapSet(maps) my_plotter = Plotter( stamp='', outdir=args.out_dir, fmt='pdf', log=False, label='' ) my_plotter.plot_2d_array( maps, fname='%s_%s_%s_%s'%(sys, args.tag, d, smooth), )
def unfold_mc(self): logging.debug('Unfolding monte carlo sample') regularisation = int(self.params['regularisation'].m) unfold_bg = self.params['unfold_bg'].value unfold_eff = self.params['unfold_eff'].value unfold_unweighted = self.params['unfold_unweighted'].value # Split data into signal, bg and all (signal+bg) signal_data, bg_data, all_data = self.split_data() # Load generator level data for signal gen_data = self.load_gen_data() # Return true map is regularisation is set to 0 if regularisation == 0: logging.info('Returning baseline MapSet') true = roounfold._histogram(events=gen_data, binning=self.true_binning, weights=gen_data['pisa_weight'], errors=True, name=self.output_str) return MapSet([true]) # Get the inversed efficiency histogram if not unfold_eff: inv_eff = self.get_inv_eff(signal_data, gen_data) # Set the reco and true data based on cfg file settings reco_norm_data = None true_norm_data = None data = signal_data if unfold_bg: reco_norm_data = all_data if unfold_eff: true_norm_data = gen_data if reco_norm_data is None: reco_norm_data = signal_data if true_norm_data is None: true_norm_data = signal_data if unfold_unweighted: ones = np.ones(reco_norm_data['pisa_weight'].shape) reco_norm_data['pisa_weight'] = ones true_norm_data['pisa_weight'] = ones data['pisa_weight'] = ones # Create response object response = self.create_response(reco_norm_data, true_norm_data, data) # Make pseduodata all_hist = self._histogram(events=all_data, binning=self.reco_binning, weights=all_data['pisa_weight'], errors=False, name='all', tex=r'\rm{all}') seed = int(self.params['stat_fluctuations'].m) if seed != 0: if self.random_state is None or seed != self.seed: self.seed = seed self.random_state = get_random_state(seed) all_hist = all_hist.fluctuate('poisson', self.random_state) else: self.seed = None self.random_state = None all_hist.set_poisson_errors() # Background Subtraction if unfold_bg: reco = all_hist else: bg_hist = self.get_bg_hist(bg_data) reco = all_hist - bg_hist reco.name = 'reco_signal' reco.tex = r'\rm{reco_signal}' r_flat = roounfold._flatten_to_1d(reco) r_th1d = convert_to_th1d(r_flat, errors=True) # Find optimum value for regularisation parameter if self.params['optimize_reg'].value: chisq = None for r_idx in range(regularisation): unfold = RooUnfoldBayes(response, r_th1d, r_idx + 1) unfold.SetVerbose(0) idx_chisq = unfold.Chi2(self.t_th1d, 1) if chisq is None: pass elif idx_chisq > chisq: regularisation = r_idx break chisq = idx_chisq # Unfold unfold = RooUnfoldBayes(response, r_th1d, regularisation) unfold.SetVerbose(0) unfolded_flat = unfold.Hreco(1) unfold_map = unflatten_thist(in_th1d=unfolded_flat, binning=self.true_binning, name=self.output_str, errors=True) # Efficiency correction if not unfold_eff: unfold_map *= inv_eff del r_th1d del unfold logging.info('Unfolded reco sum {0}'.format( np.sum(unp.nominal_values(unfold_map.hist)))) return unfold_map
def compare(outdir, ref, ref_label, test, test_label, asymm_max=None, asymm_min=None, combine=None, diff_max=None, diff_min=None, fract_diff_max=None, fract_diff_min=None, json=False, pdf=False, png=False, ref_abs=False, ref_param_selections=None, sum=None, test_abs=False, test_param_selections=None): """Compare two entities. The result each entity specification is formatted into a MapSet and stored to disk, so that e.g. re-running a DistributionMaker is unnecessary to reproduce the results. Parameters ---------- outdir : string Store output plots to this directory ref : string or array of strings Pipeline settings config file that generates reference output, or a stored map or map set. Multiple pipelines, maps, or map sets are supported ref_abs : bool Use the absolute value of the reference plot for comparisons ref_label : string Label for reference ref_param-selections : string Param selections to apply to ref pipeline config(s). Not applicable if ref specifies stored map or map sets test : string or array of strings Pipeline settings config file that generates test output, or a stored map or map set. Multiple pipelines, maps, or map sets are supported test_abs : bool Use the absolute value of the test plot for comparisons test_label : string Label for test test_param_selections : None or string Param selections to apply to test pipeline config(s). Not applicable if test specifies stored map or map sets combine : None or string or array of strings Combine by wildcard string, where string globbing (a la command line) uses asterisk for any number of wildcard characters. Use single quotes such that asterisks do not get expanded by the shell. Multiple combine strings supported sum : None or int Sum over (and hence remove) the specified axis or axes. I.e., project the map onto remaining (unspecified) axis or axes json : bool Save output maps in compressed json (json.bz2) format pdf : bool Save plots in PDF format. If neither this nor png is specified, no plots are produced png : bool Save plots in PNG format. If neither this nor pdf is specfied, no plots are produced diff_min : None or float Difference plot vmin; if you specify only one of diff_min or diff_max, symmetric limits are automatically used (min = -max) diff_max : None or float Difference plot max; if you specify only one of diff_min or diff_max, symmetric limits are automatically used (min = -max) fract_diff_min : None or float Fractional difference plot vmin; if you specify only one of fract_diff_min or fract_diff_max, symmetric limits are automatically used (min = -max) fract_diff_max : None or float Fractional difference plot max; if you specify only one of fract_diff_min or fract_diff_max, symmetric limits are automatically used (min = -max) asymm_min : None or float Asymmetry plot vmin; if you specify only one of asymm_min or asymm_max, symmetric limits are automatically used (min = -max) asymm_max : None or float Fractional difference plot max; if you specify only one of asymm_min or asymm_max, symmetric limits are automatically used (min = -max) Returns ------- summary_stats : dict Dictionary containing a summary for each h Map processed diff : MapSet MapSet of the difference - (Test - Ref) fract_diff : MapSet MapSet of the fractional difference - (Test - Ref) / Ref asymm : MapSet MapSet of the asymmetric fraction difference or pull - (Test - Ref) / sqrt(Ref) """ ref_plot_label = ref_label if ref_abs and not ref_label.startswith('abs'): ref_plot_label = 'abs(%s)' % ref_plot_label test_plot_label = test_label if test_abs and not test_label.startswith('abs'): test_plot_label = 'abs(%s)' % test_plot_label plot_formats = [] if pdf: plot_formats.append('pdf') if png: plot_formats.append('png') diff_symm = True if diff_min is not None and diff_max is None: diff_max = -diff_min diff_symm = False if diff_max is not None and diff_min is None: diff_min = -diff_max diff_symm = False fract_diff_symm = True if fract_diff_min is not None and fract_diff_max is None: fract_diff_max = -fract_diff_min fract_diff_symm = False if fract_diff_max is not None and fract_diff_min is None: fract_diff_min = -fract_diff_max fract_diff_symm = False asymm_symm = True if asymm_max is not None and asymm_min is None: asymm_min = -asymm_max asymm_symm = False if asymm_min is not None and asymm_max is None: asymm_max = -asymm_min asymm_symm = False outdir = os.path.expanduser(os.path.expandvars(outdir)) mkdir(outdir) # Get the reference distribution(s) into the form of a test MapSet p_ref = None ref_source = None if isinstance(ref, Map): p_ref = MapSet(ref) ref_source = MAP_SOURCE_STR elif isinstance(ref, MapSet): p_ref = ref ref_source = MAPSET_SOURCE_STR elif isinstance(ref, Pipeline): if ref_param_selections is not None: ref.select_params(ref_param_selections) p_ref = ref.get_outputs() ref_source = PIPELINE_SOURCE_STR elif isinstance(ref, DistributionMaker): if ref_param_selections is not None: ref.select_params(ref_param_selections) p_ref = ref.get_outputs() ref_source = DISTRIBUTIONMAKER_SOURCE_STR else: if len(ref) == 1: try: ref_pipeline = Pipeline(config=ref[0]) except: pass else: ref_source = PIPELINE_SOURCE_STR if ref_param_selections is not None: ref_pipeline.select_params(ref_param_selections) p_ref = ref_pipeline.get_outputs() else: try: ref_dmaker = DistributionMaker(pipelines=ref) except: pass else: ref_source = DISTRIBUTIONMAKER_SOURCE_STR if ref_param_selections is not None: ref_dmaker.select_params(ref_param_selections) p_ref = ref_dmaker.get_outputs() if p_ref is None: try: p_ref = [Map.from_json(f) for f in ref] except: pass else: ref_source = MAP_SOURCE_STR p_ref = MapSet(p_ref) if p_ref is None: assert ref_param_selections is None assert len(ref) == 1, 'Can only handle one MapSet' try: p_ref = MapSet.from_json(ref[0]) except: pass else: ref_source = MAPSET_SOURCE_STR if p_ref is None: raise ValueError( 'Could not instantiate the reference Pipeline, DistributionMaker,' ' Map, or MapSet from ref value(s) %s' % ref) ref = p_ref logging.info('Reference map(s) derived from a ' + ref_source) # Get the test distribution(s) into the form of a test MapSet p_test = None test_source = None if isinstance(test, Map): p_test = MapSet(test) test_source = MAP_SOURCE_STR elif isinstance(test, MapSet): p_test = test test_source = MAPSET_SOURCE_STR elif isinstance(test, Pipeline): if test_param_selections is not None: test.select_params(test_param_selections) p_test = test.get_outputs() test_source = PIPELINE_SOURCE_STR elif isinstance(test, DistributionMaker): if test_param_selections is not None: test.select_params(test_param_selections) p_test = test.get_outputs() test_source = DISTRIBUTIONMAKER_SOURCE_STR else: if len(test) == 1: try: test_pipeline = Pipeline(config=test[0]) except: pass else: test_source = PIPELINE_SOURCE_STR if test_param_selections is not None: test_pipeline.select_params(test_param_selections) p_test = test_pipeline.get_outputs() else: try: test_dmaker = DistributionMaker(pipelines=test) except: pass else: test_source = DISTRIBUTIONMAKER_SOURCE_STR if test_param_selections is not None: test_dmaker.select_params(test_param_selections) p_test = test_dmaker.get_outputs() if p_test is None: try: p_test = [Map.from_json(f) for f in test] except: pass else: test_source = MAP_SOURCE_STR p_test = MapSet(p_test) if p_test is None: assert test_param_selections is None assert len(test) == 1, 'Can only handle one MapSet' try: p_test = MapSet.from_json(test[0]) except: pass else: test_source = MAPSET_SOURCE_STR if p_test is None: raise ValueError( 'Could not instantiate the test Pipeline, DistributionMaker, Map,' ' or MapSet from test value(s) %s' % test) test = p_test logging.info('Test map(s) derived from a ' + test_source) if combine is not None: ref = ref.combine_wildcard(combine) test = test.combine_wildcard(combine) if isinstance(ref, Map): ref = MapSet([ref]) if isinstance(test, Map): test = MapSet([test]) if sum is not None: ref = ref.sum(sum) test = test.sum(sum) # Set the MapSet names according to args passed by user ref.name = ref_label test.name = test_label # Save to disk the maps being plotted (excluding optional aboslute value # operations) if json: refmaps_path = os.path.join(outdir, 'maps__%s.json.bz2' % ref_label) to_file(ref, refmaps_path) testmaps_path = os.path.join(outdir, 'maps__%s.json.bz2' % test_label) to_file(test, testmaps_path) if set(test.names) != set(ref.names): raise ValueError('Test map names %s do not match ref map names %s.' % (sorted(test.names), sorted(ref.names))) # Aliases to save keystrokes def masked(x): return np.ma.masked_invalid(x.nominal_values) def zero_to_nan(map): newmap = deepcopy(map) mask = np.isclose(newmap.nominal_values, 0, rtol=0, atol=EPSILON) newmap.hist[mask] = np.nan return newmap reordered_test = [] new_ref = [] diff_maps = [] fract_diff_maps = [] asymm_maps = [] summary_stats = {} for ref_map in ref: test_map = test[ref_map.name].reorder_dimensions(ref_map.binning) if ref_abs: ref_map = abs(ref_map) if test_abs: test_map = abs(test_map) diff_map = test_map - ref_map fract_diff_map = (test_map - ref_map) / zero_to_nan(ref_map) asymm_map = (test_map - ref_map) / zero_to_nan(ref_map**0.5) abs_fract_diff_map = np.abs(fract_diff_map) new_ref.append(ref_map) reordered_test.append(test_map) diff_maps.append(diff_map) fract_diff_maps.append(fract_diff_map) asymm_maps.append(asymm_map) min_ref = np.min(masked(ref_map)) max_ref = np.max(masked(ref_map)) min_test = np.min(masked(test_map)) max_test = np.max(masked(test_map)) total_ref = np.sum(masked(ref_map)) total_test = np.sum(masked(test_map)) mean_ref = np.mean(masked(ref_map)) mean_test = np.mean(masked(test_map)) max_abs_fract_diff = np.max(masked(abs_fract_diff_map)) mean_abs_fract_diff = np.mean(masked(abs_fract_diff_map)) median_abs_fract_diff = np.median(masked(abs_fract_diff_map)) mean_fract_diff = np.mean(masked(fract_diff_map)) min_fract_diff = np.min(masked(fract_diff_map)) max_fract_diff = np.max(masked(fract_diff_map)) std_fract_diff = np.std(masked(fract_diff_map)) mean_diff = np.mean(masked(diff_map)) min_diff = np.min(masked(diff_map)) max_diff = np.max(masked(diff_map)) std_diff = np.std(masked(diff_map)) median_diff = np.nanmedian(masked(diff_map)) mad_diff = np.nanmedian(masked(np.abs(diff_map))) median_fract_diff = np.nanmedian(masked(fract_diff_map)) mad_fract_diff = np.nanmedian(masked(np.abs(fract_diff_map))) min_asymm = np.min(masked(fract_diff_map)) max_asymm = np.max(masked(fract_diff_map)) total_asymm = np.sqrt(np.sum(masked(asymm_map)**2)) summary_stats[test_map.name] = OrderedDict([ ('min_ref', min_ref), ('max_ref', max_ref), ('total_ref', total_ref), ('mean_ref', mean_ref), ('min_test', min_test), ('max_test', max_test), ('total_test', total_test), ('mean_test', mean_test), ('max_abs_fract_diff', max_abs_fract_diff), ('mean_abs_fract_diff', mean_abs_fract_diff), ('median_abs_fract_diff', median_abs_fract_diff), ('min_fract_diff', min_fract_diff), ('max_fract_diff', max_fract_diff), ('mean_fract_diff', mean_fract_diff), ('std_fract_diff', std_fract_diff), ('median_fract_diff', median_fract_diff), ('mad_fract_diff', mad_fract_diff), ('min_diff', min_diff), ('max_diff', max_diff), ('mean_diff', mean_diff), ('std_diff', std_diff), ('median_diff', median_diff), ('mad_diff', mad_diff), ('min_asymm', min_asymm), ('max_asymm', max_asymm), ('total_asymm', total_asymm), ]) logging.info('Map %s...', ref_map.name) logging.info(' Ref map(s):') logging.info(' min :' + ('%.2f' % min_ref).rjust(12)) logging.info(' max :' + ('%.2f' % max_ref).rjust(12)) logging.info(' total :' + ('%.2f' % total_ref).rjust(12)) logging.info(' mean :' + ('%.2f' % mean_ref).rjust(12)) logging.info(' Test map(s):') logging.info(' min :' + ('%.2f' % min_test).rjust(12)) logging.info(' max :' + ('%.2f' % max_test).rjust(12)) logging.info(' total :' + ('%.2f' % total_test).rjust(12)) logging.info(' mean :' + ('%.2f' % mean_test).rjust(12)) logging.info(' Absolute fract. diff., abs((Test - Ref) / Ref):') logging.info(' max : %.4e', max_abs_fract_diff) logging.info(' mean : %.4e', mean_abs_fract_diff) logging.info(' median: %.4e', median_abs_fract_diff) logging.info(' Fractional difference, (Test - Ref) / Ref:') logging.info(' min : %.4e', min_fract_diff) logging.info(' max : %.4e', max_fract_diff) logging.info(' mean : %.4e +/- %.4e', mean_fract_diff, std_fract_diff) logging.info(' median: %.4e +/- %.4e', median_fract_diff, mad_fract_diff) logging.info(' Difference, Test - Ref:') logging.info(' min : %.4e', min_diff) logging.info(' max : %.4e', max_diff) logging.info(' mean : %.4e +/- %.4e', mean_diff, std_diff) logging.info(' median: %.4e +/- %.4e', median_diff, mad_diff) logging.info(' Asymmetry, (Test - Ref) / sqrt(Ref)') logging.info(' min : %.4e', min_asymm) logging.info(' max : %.4e', max_asymm) logging.info(' total : %.4e (sum in quadrature)', total_asymm) logging.info('') ref = MapSet(new_ref) test = MapSet(reordered_test) diff = MapSet(diff_maps) fract_diff = MapSet(fract_diff_maps) asymm = MapSet(asymm_maps) if json: diff.to_json( os.path.join( outdir, 'diff__%s__%s.json.bz2' % (test_plot_label, ref_plot_label))) fract_diff.to_json( os.path.join( outdir, 'fract_diff__%s___%s.json.bz2' % (test_plot_label, ref_plot_label))) asymm.to_json( os.path.join( outdir, 'asymm__%s___%s.json.bz2' % (test_plot_label, ref_plot_label))) to_file( summary_stats, os.path.join( outdir, 'stats__%s__%s.json.bz2' % (test_plot_label, ref_plot_label))) for plot_format in plot_formats: # Plot the raw distributions plotter = Plotter(stamp='', outdir=outdir, fmt=plot_format, log=False, annotate=False, symmetric=False, ratio=False) plotter.plot_2d_array(ref, fname='distr__%s' % ref_plot_label) plotter.plot_2d_array(test, fname='distr__%s' % test_plot_label) # Plot the difference (test - ref) plotter = Plotter(stamp='', outdir=outdir, fmt=plot_format, log=False, annotate=False, symmetric=diff_symm, ratio=False) plotter.label = '%s - %s' % (test_plot_label, ref_plot_label) plotter.plot_2d_array( test - ref, fname='diff__%s__%s' % (test_plot_label, ref_plot_label), #vmin=diff_min, vmax=diff_max ) # Plot the fractional difference (test - ref)/ref plotter = Plotter(stamp='', outdir=outdir, fmt=plot_format, log=False, annotate=False, symmetric=fract_diff_symm, ratio=True) plotter.label = ('(%s-%s)/%s' % (test_plot_label, ref_plot_label, ref_plot_label)) plotter.plot_2d_array( (test - ref) / MapSet([zero_to_nan(r) for r in ref]), fname='fract_diff__%s__%s' % (test_plot_label, ref_plot_label), #vmin=fract_diff_min, vmax=fract_diff_max ) # Plot the asymmetry (test - ref)/sqrt(ref) plotter = Plotter(stamp='', outdir=outdir, fmt=plot_format, log=False, annotate=False, symmetric=asymm_symm, ratio=True) plotter.label = (r'$(%s - %s)/\sqrt{%s}$' % (test_plot_label, ref_plot_label, ref_plot_label)) plotter.plot_2d_array( (test - ref) / MapSet([zero_to_nan(r**0.5) for r in ref]), fname='asymm__%s__%s' % (test_plot_label, ref_plot_label), #vmin=asymm_min, vmax=asymm_max ) return summary_stats, diff, fract_diff, asymm
def _compute_outputs(self, inputs=None): """Compute histograms for output channels.""" logging.debug('Entering nusquids._compute_outputs') if not isinstance(inputs, MapSet): raise AssertionError('inputs is not a MapSet object, instead ' 'is type {0}'.format(type(inputs))) # TODO(shivesh): oversampling # TODO(shivesh): more options # TODO(shivesh): static function # TODO(shivesh): hashing binning = self.input_binning.basename_binning binning = binning.reorder_dimensions(('coszen', 'energy'), use_basenames=True) cz_binning = binning['coszen'] en_binning = binning['energy'] units = nsq.Const() interactions = False cz_min = cz_binning.bin_edges.min().m_as('radian') cz_max = cz_binning.bin_edges.max().m_as('radian') en_min = en_binning.bin_edges.min().m_as('GeV') * units.GeV en_max = en_binning.bin_edges.max().m_as('GeV') * units.GeV cz_centers = cz_binning.weighted_centers.m_as('radian') en_centers = en_binning.weighted_centers.m_as('GeV') * units.GeV cz_grid = np.array([cz_min] + cz_centers.tolist() + [cz_max]) en_grid = np.array([en_min] + en_centers.tolist() + [en_max]) nu_flavours = 3 nuSQ = nsq.nuSQUIDSAtm(cz_grid, en_grid, nu_flavours, nsq.NeutrinoType.both, interactions) nuSQ.Set_EvalThreads(multiprocessing.cpu_count()) theta12 = self.params['theta12'].value.m_as('radian') theta13 = self.params['theta13'].value.m_as('radian') theta23 = self.params['theta23'].value.m_as('radian') deltam21 = self.params['deltam21'].value.m_as('eV**2') deltam31 = self.params['deltam21'].value.m_as('eV**2') # TODO(shivesh): check if deltacp should be in radians deltacp = self.params['deltacp'].value.m_as('radian') nuSQ.Set_MixingAngle(0, 1, theta12) nuSQ.Set_MixingAngle(0, 2, theta13) nuSQ.Set_MixingAngle(1, 2, theta23) nuSQ.Set_SquareMassDifference(1, deltam21) nuSQ.Set_SquareMassDifference(2, deltam31) nuSQ.Set_CPPhase(0, 2, deltacp) nuSQ.Set_rel_error(1.0e-10) nuSQ.Set_abs_error(1.0e-10) # Pad the edges of the energy, coszen space to cover the entire grid range cz_shape = cz_binning.shape[0] + 2 en_shape = en_binning.shape[0] + 2 shape = (cz_shape, en_shape) + (2, 3) initial_state = np.full(shape, np.nan) def pad_inputs(x): return np.pad(unp.nominal_values(x.hist), 1, 'edge') # Third index is selecting nu(0), nubar(1) # Fourth index is selecting flavour nue(0), numu(1), nutau(2) initial_state[:, :, 0, 0] = pad_inputs(inputs['nue']) initial_state[:, :, 1, 0] = pad_inputs(inputs['nuebar']) initial_state[:, :, 0, 1] = pad_inputs(inputs['numu']) initial_state[:, :, 1, 1] = pad_inputs(inputs['numubar']) initial_state[:, :, 0, 2] = np.zeros(pad_inputs(inputs['nue']).shape) initial_state[:, :, 1, 2] = np.zeros(pad_inputs(inputs['nue']).shape) if np.any(np.isnan(initial_state)): raise AssertionError('nan entries in initial_state: ' '{0}'.format(initial_state)) nuSQ.Set_initial_state(initial_state, nsq.Basis.flavor) # TODO(shivesh): use verbosity level to set this nuSQ.Set_ProgressBar(True) nuSQ.EvolveState() os = self.params['oversample'].value.m os_binning = binning.oversample(os) os_cz_binning = os_binning['coszen'] os_en_binning = os_binning['energy'] os_cz_centers = os_cz_binning.weighted_centers.m_as('radians') os_en_centers = os_en_binning.weighted_centers.m_as('GeV') fs = {} for nu in self.output_names: fs[nu] = np.full(os_binning.shape, np.nan) for icz, cz_bin in enumerate(os_cz_centers): for ie, en_bin in enumerate(os_en_centers): en_bin_u = en_bin * units.GeV fs['nue'][icz][ie] = nuSQ.EvalFlavor(0, cz_bin, en_bin_u, 0) fs['nuebar'][icz][ie] = nuSQ.EvalFlavor(0, cz_bin, en_bin_u, 1) fs['numu'][icz][ie] = nuSQ.EvalFlavor(1, cz_bin, en_bin_u, 0) fs['numubar'][icz][ie] = nuSQ.EvalFlavor( 1, cz_bin, en_bin_u, 1) fs['nutau'][icz][ie] = nuSQ.EvalFlavor(2, cz_bin, en_bin_u, 0) fs['nutaubar'][icz][ie] = nuSQ.EvalFlavor( 2, cz_bin, en_bin_u, 1) out_binning = self.input_binning.reorder_dimensions( ('coszen', 'energy'), use_basenames=True) os_out_binning = out_binning.oversample(os) outputs = [] for key in fs.iterkeys(): if np.any(np.isnan(fs[key])): raise AssertionError( 'Invalid value computed for {0} oscillated output: ' '\n{1}'.format(key, fs[key])) map = Map(name=key, binning=os_out_binning, hist=fs[key]) map = map.downsample(os) / float(os) map = map.reorder_dimensions(self.input_binning) outputs.append(map) return MapSet(outputs)
def histogram_set(self, binning, nu_weights_col, mu_weights_col, noise_weights_col, mapset_name, errors=False): """Uses the above histogram function but returns the set of all of them for everything in the Data object. Parameters ---------- binning : OneDimBinning, MultiDimBinning The definition of the binning for the histograms. nu_weights_col : None or string The column in the Data object by which to weight the neutrino histograms. Specify None for unweighted histograms. mu_weights_col : None or string The column in the Data object by which to weight the muon histograms. Specify None for unweighted histograms. noise_weights_col : None or string The column in the Data object by which to weight the noise histograms. Specify None for unweighted histograms. mapset_name : string The name by which the resulting MapSet will be identified. errors : boolean A flag for whether to calculate errors on the histograms or not. This defaults to False. Returns ------- MapSet : A MapSet containing all of the Maps for everything in this Data object. """ if not isinstance(binning, MultiDimBinning): if not isinstance(binning, OneDimBinning): raise TypeError('binning should be either MultiDimBinning or ' 'OneDimBinning object. Got %s.' % type(binning)) if nu_weights_col is not None: if not isinstance(nu_weights_col, basestring): raise TypeError('nu_weights_col should be a string. Got %s' % type(nu_weights_col)) if mu_weights_col is not None: if not isinstance(mu_weights_col, basestring): raise TypeError('mu_weights_col should be a string. Got %s' % type(mu_weights_col)) if not isinstance(errors, bool): raise TypeError('flag for whether to calculate errors or not ' 'should be a boolean. Got %s.' % type(errors)) outputs = [] if self.contains_neutrinos: for fig in self.iterkeys(): outputs.append( self.histogram(kinds=fig, binning=binning, weights_col=nu_weights_col, errors=errors, name=str(NuFlavIntGroup(fig)))) if self.contains_muons: outputs.append( self.histogram(kinds='muons', binning=binning, weights_col=mu_weights_col, errors=errors, name='muons', tex=r'\rm{muons}')) if self.contains_noise: outputs.append( self.histogram(kinds='noise', binning=binning, weights_col=mu_weights_col, errors=errors, name='noise', tex=r'\rm{noise}')) return MapSet(maps=outputs, name=mapset_name)
def test_kde_histogramdd(): """Unit tests for kde_histogramdd""" from argparse import ArgumentParser from shutil import rmtree from tempfile import mkdtemp from pisa import ureg from pisa.core.map import Map, MapSet from pisa.utils.log import logging, set_verbosity from pisa.utils.plotter import Plotter parser = ArgumentParser() parser.add_argument("-v", action="count", default=None, help="set verbosity level") args = parser.parse_args() set_verbosity(args.v) temp_dir = mkdtemp() try: my_plotter = Plotter( stamp="", outdir=temp_dir, fmt="pdf", log=False, annotate=False, symmetric=False, ratio=True, ) b1 = OneDimBinning(name="coszen", num_bins=20, is_lin=True, domain=[-1, 1], tex=r"\cos(\theta)") b2 = OneDimBinning(name="energy", num_bins=10, is_log=True, domain=[1, 80] * ureg.GeV, tex=r"E") b3 = OneDimBinning(name="pid", num_bins=2, bin_edges=[0, 1, 2], tex=r"pid") binning = b1 * b2 * b3 # now let's generate some toy data N = 100000 cz = np.random.normal(1, 1.2, N) # cut away coszen outside -1, 1 cz = cz[(cz >= -1) & (cz <= 1)] e = np.random.normal(30, 20, len(cz)) pid = np.random.uniform(0, 2, len(cz)) data = np.array([cz, e, pid]).T # make numpy histogram for validation bins = [unp.nominal_values(b.bin_edges) for b in binning] raw_hist, _ = np.histogramdd(data, bins=bins) # get KDE'ed histo hist = kde_histogramdd( data, binning, bw_method="silverman", coszen_name="coszen", oversample=10, use_cuda=True, stack_pid=True, ) # put into mapsets and plot m1 = Map(name="KDE", hist=hist, binning=binning) m2 = Map(name="raw", hist=raw_hist, binning=binning) with np.errstate(divide="ignore", invalid="ignore"): m3 = m2 / m1 m3.name = "hist/KDE" m3.tex = m3.name m4 = m1 - m2 m4.name = "KDE - hist" m4.tex = m4.name ms = MapSet([m1, m2, m3, m4]) my_plotter.plot_2d_array(ms, fname="test_kde", cmap="summer") except: rmtree(temp_dir) raise else: logging.warning("Inspect and manually clean up output(s) saved to %s" % temp_dir)
prop=dict(size=12)) plt.setp(legend.get_title(), fontsize=18) at = AnchoredText(r'$%s$' % map.tex, prop=dict(size=20), frameon=True, loc=2) at.patch.set_boxstyle("round,pad=0.,rounding_size=0.5") ax.add_artist(at) fig.savefig(outfile, bbox_inches='tight', dpi=150) if __name__ == "__main__": args = parse_args() set_verbosity(args.verbose) logging.info('Loading Map from file {0}'.format(args.infile)) input_MapSet = MapSet.from_json(args.infile) if len(input_MapSet) > 1: input_Map = input_MapSet[args.name] else: input_Map = input_MapSet.pop() fileio.mkdir(args.outdir, mode=0755) outfile = args.outdir + '/' + args.outname logging.info('outfile {0}'.format(outfile)) plot_CFX_one(map=input_Map, outfile=outfile, logy=args.logy, ylim=args.ylim, ylabel=args.ylabel)
def norm_sys_distributions(input_data): """Normalises systematics mapsets to the nominal mapset, performing error propagation. Parameters ---------- input_data : dict The data container returned by `make_discrete_sys_distributions`. Note that this is modified by this function to add the normalised distrbutions. Notes ----- Nothing is returned; `input_data` is modified in-place """ # # Get the input mapsets # nominal_mapset = [ dataset["mapset"] for dataset in input_data["datasets"] if dataset["nominal"] ] if len(nominal_mapset) != 1: raise ValueError("need 1 but got {} nominal mapsets".format( len(nominal_mapset))) nominal_mapset = nominal_mapset[0] for dataset_dict in input_data["datasets"]: dataset_dict["norm_mapset"] = [] # # loop over types of event # for map_name in nominal_mapset.names: logging.info('Normalizing "%s" maps.', map_name) nominal_map = nominal_mapset[map_name] nominal_map_nominal_vals = nominal_map.nominal_values # Note that all # `sys_mapset[map_name].nominal_values` # and # `nominal_map.nominal_values` # are finite (neither infinite nor NaN), so only issue for diviiding # the former by the latter is if there are "empty bins" (zeros) in # `nominal_map.nominal_values` finite_mask = nominal_map_nominal_vals != 0 # # loop over datasets # for dataset_dict in input_data["datasets"]: # # Normalise maps # sys_mapset_map = dataset_dict["mapset"][map_name] # TODO: think about the best way to perform unc. propagation # Crete a new array with uncertainties the same shape as map; # values are irrelevant as all will be overwritten norm_sys_hist = copy.deepcopy(sys_mapset_map.hist) # Note that we divide by nominal_vals to avoid double counting the # uncertainty on the nominal template when applying the hyperplane # fits norm_sys_hist[finite_mask] = ( sys_mapset_map.hist[finite_mask] / nominal_map_nominal_vals[finite_mask]) norm_sys_hist[~finite_mask] = ufloat(np.NaN, np.NaN) # TODO Check for bins that are empty in the nominal hist but not in # at least one of the sys sets; currently we do not support this... norm_sys_map = Map( name=sys_mapset_map.name, binning=sys_mapset_map.binning, hist=norm_sys_hist, ) # TODO Save the map dataset_dict["norm_mapset"].append(norm_sys_map) # Re-format for dataset_dict in input_data["datasets"]: dataset_dict["norm_mapset"] = MapSet(maps=dataset_dict["norm_mapset"], name=dataset_dict["mapset"].name)
def _compute_outputs(self, inputs=None): """Compute histograms for output channels.""" logging.debug('Entering mceq._compute_outputs') primary_model = split(self.params['primary_model'].value, ',') if len(primary_model) != 2: raise ValueError('primary_model is not of length 2, instead is of ' 'length {0}'.format(len(primary_model))) primary_model[0] = eval('pm.' + primary_model[0]) density_model = (self.params['density_model'].value, (self.params['location'].value, self.params['season'].value)) mceq_run = MCEqRun( interaction_model=str(self.params['interaction_model'].value), primary_model=primary_model, theta_deg=0.0, density_model=density_model, **mceq_config.mceq_config_without(['density_model'])) # Power of energy to scale the flux (the results will be returned as E**mag * flux) mag = 0 # Obtain energy grid (fixed) of the solution for the x-axis of the plots e_grid = mceq_run.e_grid # Dictionary for results flux = OrderedDict() for nu in self.output_names: flux[nu] = [] binning = self.output_binning cz_binning = binning.dims[binning.index('coszen', use_basenames=True)] en_binning = binning.dims[binning.index('energy', use_basenames=True)] cz_centers = cz_binning.weighted_centers.m angles = (np.arccos(cz_centers) * ureg.radian).m_as('degrees') for theta in angles: mceq_run.set_theta_deg(theta) mceq_run.solve() flux['nue'].append(mceq_run.get_solution('total_nue', mag)) flux['nuebar'].append(mceq_run.get_solution('total_antinue', mag)) flux['numu'].append(mceq_run.get_solution('total_numu', mag)) flux['numubar'].append(mceq_run.get_solution( 'total_antinumu', mag)) for nu in flux.iterkeys(): flux[nu] = np.array(flux[nu]) smoothing = self.params['smoothing'].value.m en_centers = en_binning.weighted_centers.m_as('GeV') spline_flux = self.bivariate_spline(flux, cz_centers, e_grid, smooth=smoothing) ev_flux = self.bivariate_evaluate(spline_flux, cz_centers, en_centers) for nu in ev_flux: ev_flux[nu] = ev_flux[nu] * ureg('cm**-2 s**-1 sr**-1 GeV**-1') mapset = [] for nu in ev_flux.iterkeys(): mapset.append(Map(name=nu, hist=ev_flux[nu], binning=binning)) return MapSet(mapset)
def test_BinnedTensorTransform(): """Unit tests for BinnedTensorTransform class""" binning = MultiDimBinning([ dict(name='energy', is_log=True, domain=(1, 80) * ureg.GeV, num_bins=10), dict(name='coszen', is_lin=True, domain=(-1, 0), num_bins=5) ]) nue_map = Map(name='nue', binning=binning, hist=np.random.random(binning.shape)) nue_map.set_poisson_errors() numu_map = Map(name='numu', binning=binning, hist=np.random.random(binning.shape)) numu_map.set_poisson_errors() inputs = MapSet( name='inputs', maps=[nue_map, numu_map], ) xform0 = BinnedTensorTransform(input_names='nue', output_name='nue', input_binning=binning, output_binning=binning, xform_array=2 * np.ones(binning.shape)) xform1 = BinnedTensorTransform(input_names=['numu'], output_name='numu', input_binning=binning, output_binning=binning, xform_array=3 * np.ones(binning.shape)) xform2 = BinnedTensorTransform( input_names=['nue', 'numu'], output_name='nue_numu', input_binning=binning, output_binning=binning, xform_array=np.stack( [2 * np.ones(binning.shape), 3 * np.ones(binning.shape)], axis=0)) assert np.all((xform2 + 2).xform_array - xform2.xform_array == 2) testdir = tempfile.mkdtemp() try: for i, t in enumerate([xform0, xform1, xform2]): t_file = os.path.join(testdir, str(i) + '.json') t.to_json(t_file) t_ = BinnedTensorTransform.from_json(t_file) assert t_ == t, 't=\n%s\nt_=\n%s' % (t, t_) finally: shutil.rmtree(testdir, ignore_errors=True) logging.info('<< PASS : test_BinnedTensorTransform >>') xforms = TransformSet(name='scaling', transforms=[xform0, xform1, xform2], hash=9) assert xforms.hash == 9 xforms.hash = -20 assert xforms.hash == -20 _ = xforms.apply(inputs) # TODO: get this working above, then test here! #xforms2 = xforms * 2 testdir = tempfile.mkdtemp() try: for i, t in enumerate([xforms]): t_filename = os.path.join(testdir, str(i) + '.json') t.to_json(t_filename) t_ = TransformSet.from_json(t_filename) assert t_ == t, 't=\n%s\nt_=\n%s' % (t.transforms, t_.transforms) finally: shutil.rmtree(testdir, ignore_errors=True) logging.info('<< PASS : test_TransformSet >>')
def main(): global SIGMA args = vars(parse_args()) set_verbosity(args.pop('v')) center_zero = args.pop('center_zero') make_pdf = False if args['pdf']: make_pdf = True args['pdf'] = False outdir = args.pop('outdir') fileio.mkdir(outdir, mode=0755) SIGMA *= args.pop('sigma') cfx_pipe = Pipeline(args.pop('cfx_pipeline')) signal = args.pop('signal').replace(' ', '').split(',') output_str = [] for name in signal: if 'muons' in name or 'noise' in name: raise AssertionError('Are you trying to unfold muons/noise?') elif 'all_nu' in name: output_str = [str(NuFlavIntGroup(f)) for f in ALL_NUFLAVINTS] else: output_str.append(NuFlavIntGroup(name)) output_str = [str(f) for f in output_str] cfx_pipe._output_names = output_str # Turn off stat fluctuations stat_param = cfx_pipe.params['stat_fluctuations'] stat_param.value = 0 * ureg.dimensionless cfx_pipe.update_params(stat_param) # Get nominal Map re_param = cfx_pipe.params['regularisation'] re_param.value = 0 * ureg.dimensionless cfx_pipe.update_params(re_param) nom_out = cfx_pipe.get_outputs() re_param.reset() cfx_pipe.update_params(re_param) params = ParamSet() for param in cfx_pipe.params: if param.name != 'dataset': params.extend(param) free = params.free logging.info('Free params = {0}'.format(free)) contin = True for f in free: if 'hole_ice' not in f.name and 'dom_eff' not in f.name: continue # if 'atm_muon_scale' in f.name: # contin = False # if contin: # continue logging.info('Working on parameter {0}'.format(f.name)) if f.prior.kind != 'uniform': # Use deltaLLH = SIGMA to define +/- sigma for non-uniform scan_over = np.linspace(*f.range, num=1000) * f.range[0].u llh = f.prior.llh(scan_over) dllh = llh - np.min(-llh) mllh_idx = np.argmin(-llh) if mllh_idx == 0: l_sig_idx = 0 else: l_sig_idx = np.argmin(np.abs(dllh[:mllh_idx] - SIGMA)) u_sig_idx = np.argmin(np.abs(dllh[mllh_idx:] - SIGMA)) + mllh_idx l_sigma = scan_over[l_sig_idx] u_sigma = scan_over[u_sig_idx] else: l_sigma = f.range[0] u_sigma = f.range[1] logging.info('Setting {0} lower sigma bound to ' '{1}'.format(f.name, l_sigma)) f.value = l_sigma cfx_pipe.update_params(f) l_out = cfx_pipe.get_outputs() logging.info('Setting {0} upper sigma bound to ' '{1}'.format(f.name, u_sigma)) f.value = u_sigma cfx_pipe.update_params(f) u_out = cfx_pipe.get_outputs() f.reset() cfx_pipe.update_params(f) f_outdir = outdir + '/' + f.name l_outdir = f_outdir + '/' + 'lower' u_outdir = f_outdir + '/' + 'upper' fileio.mkdir(f_outdir) fileio.mkdir(l_outdir) fileio.mkdir(u_outdir) compare(outdir=l_outdir, ref=MapSet([nom_out]), ref_label='baseline', test=MapSet([l_out]), test_label=r'-sigma', **args) compare(outdir=u_outdir, ref=MapSet([nom_out]), ref_label='baseline', test=MapSet([u_out]), test_label=r'+sigma', **args) l_in_mapset = l_outdir + '/' + 'fract_diff__-sigma___baseline.json.bz2' u_in_mapset = u_outdir + '/' + 'fract_diff__+sigma___baseline.json.bz2' l_in_map = MapSet.from_json(l_in_mapset).pop() * 100. u_in_map = MapSet.from_json(u_in_mapset).pop() * 100. if make_pdf: outfile = f_outdir + '/systematic_effect.pdf' else: outfile = f_outdir + '/systematic_effect.png' title = r'% effect on ' + r'${0}$'.format(l_in_map.tex) + \ ' event counts for {0} parameter'.format(f.name) sub_titles = (r'(-\sigma - {\rm baseline}) \:/\: {\rm baseline}', r'(+\sigma - {\rm baseline}) \:/\: {\rm baseline}') make_plot( maps=(l_in_map, u_in_map), outfile=outfile, logv=False, center_zero=center_zero, vlabel=r'({\rm change} - {\rm baseline}) \:/\: {\rm baseline} (%)', title=title, sub_titles=sub_titles)