def assemble_interpolated_fits(fit_directory, output_file): """After all of the fits on the cluster are done, assemble the results to one JSON. The JSON produced by this function is what `load_interpolated_hypersurfaces` expects. """ assert os.path.isdir(fit_directory), "fit directory does not exist" metadata = from_json(os.path.join(fit_directory, "metadata.json")) combined_data = collections.OrderedDict() combined_data["interpolation_param_spec"] = metadata["interpolation_param_spec"] hs_fits = [] grid_shape = tuple(metadata["grid_shape"]) for job_idx, grid_idx in enumerate(np.ndindex(grid_shape)): gridpoint_json = os.path.join(fit_directory, f"gridpoint_{job_idx:06d}.json.bz2") logging.info(f"Reading {gridpoint_json}") gridpoint_data = from_json(gridpoint_json) assert job_idx == gridpoint_data["job_idx"] assert np.all(grid_idx == gridpoint_data["grid_idx"]) # TODO: Offer to run incomplete fits locally assert gridpoint_data["fit_successful"], f"job no. {job_idx} not finished" hs_fits.append(collections.OrderedDict( param_values=gridpoint_data["param_values"], hs_fit=gridpoint_data["hs_fit"] )) combined_data["hs_fits"] = hs_fits to_json(combined_data, output_file)
def assemble_interpolated_fits(fit_directory, output_file, drop_fit_maps=False): """After all of the fits on the cluster are done, assemble the results to one JSON. The JSON produced by this function is what `load_interpolated_hypersurfaces` expects. """ assert os.path.isdir(fit_directory), "fit directory does not exist" metadata = from_json(os.path.join(fit_directory, "metadata.json")) combined_data = collections.OrderedDict() combined_data["interpolation_param_spec"] = metadata[ "interpolation_param_spec"] # Loop over grid points hs_fits = [] grid_shape = tuple(metadata["grid_shape"]) for job_idx, grid_idx in enumerate(np.ndindex(grid_shape)): # Load grid point data gridpoint_json = os.path.join(fit_directory, f"gridpoint_{job_idx:06d}.json.bz2") logging.info(f"Reading {gridpoint_json}") gridpoint_data = from_json(gridpoint_json) # Check the loaded data assert job_idx == gridpoint_data["job_idx"] assert np.all(grid_idx == gridpoint_data["grid_idx"]) # TODO: Offer to run incomplete fits locally assert gridpoint_data[ "fit_successful"], f"job no. {job_idx} not finished" # Drop fit maps if requested (can significantly reduce file size) if drop_fit_maps: for key, hs_state in gridpoint_data["hs_fit"].items(): hs_state["fit_maps_raw"] = None hs_state["fit_maps_norm"] = None # Add grid point data to output file hs_fits.append( collections.OrderedDict( param_values=gridpoint_data["param_values"], hs_fit=gridpoint_data["hs_fit"])) # Write the output file combined_data["hs_fits"] = hs_fits to_file(combined_data, output_file)
def __init__(self,ebins,czbins,aeff_egy_par,aeff_coszen_par,**params): ''' Parameters: * aeff_egy_par - effective area vs. Energy 1D parameterizations for each flavor, in a text file (.dat) * aeff_coszen_par - json file containing 1D coszen parameterization for each flavor ''' logging.info('Initializing AeffServicePar...') self.ebins = ebins self.czbins = czbins ## Load the info from .dat files into a dict... ## Parametric approach treats all NC events the same aeff_coszen_par_str = from_json(find_resource(aeff_coszen_par)) aeff2d_nc = self.get_aeff_flavor('NC',aeff_egy_par,aeff_coszen_par_str) aeff2d_nc_bar = self.get_aeff_flavor('NC_bar',aeff_egy_par,aeff_coszen_par_str) self.aeff_dict = {} logging.info("Creating effective area parametric dict...") for flavor in ['nue','nue_bar','numu','numu_bar','nutau','nutau_bar']: flavor_dict = {} logging.debug("Working on %s effective areas"%flavor) aeff2d = self.get_aeff_flavor(flavor,aeff_egy_par,aeff_coszen_par_str) flavor_dict['cc'] = aeff2d flavor_dict['nc'] = aeff2d_nc_bar if 'bar' in flavor else aeff2d_nc self.aeff_dict[flavor] = flavor_dict return
def __init__(self,ebins,czbins,settings_file=None): ''' settings - expects the dictionary from a .json file with entries of 'a_eff_files', & 'a_eff_coszen_dep' ''' self.ebins = ebins self.czbins = czbins ##Load the settings from the file settings = from_json(find_resource(settings_file)) ## Load the info from .dat files into a dict... ## Parametric approach treats all NC events the same aeff2d_nc = self.get_aeff_flavor('NC',settings) aeff2d_nc_bar = self.get_aeff_flavor('NC_bar',settings) self.aeff_dict = {} logging.info("Creating effective area parametric dict...") for flavor in ['nue','nue_bar','numu','numu_bar','nutau','nutau_bar']: flavor_dict = {} logging.debug("Working on %s effective areas"%flavor) aeff2d = self.get_aeff_flavor(flavor,settings) flavor_dict['cc'] = aeff2d flavor_dict['nc'] = aeff2d_nc_bar if 'bar' in flavor else aeff2d_nc self.aeff_dict[flavor] = flavor_dict return
def from_json(cls, resource): """Instantiate a new TransformSet object from a JSON file. Parameters ---------- resource : str A PISA resource specification (see pisa.utils.resources) See Also -------- to_json pisa.utils.jsons.to_json """ state = jsons.from_json(resource) transforms = [] for module, classname, transform_state in state['transforms']: clsmembers = inspect.getmembers(sys.modules[__name__], inspect.isclass) # First try to get a class within this module/namespace classes = [c[1] for c in clsmembers if c[0] == classname] if len(classes) > 0: class_ = classes[0] # Otherwise try to import the module recorded in the JSON file else: module = importlib.import_module(module) # and then get the class class_ = getattr(module, classname) transforms.append(class_(**transform_state)) state['transforms'] = transforms # State is a dict, so instantiate with double-asterisk syntax return cls(**state)
def get_pid_kernels(self, pid_kernelfile=None, **kwargs): logging.info('Opening file: %s'%(pid_kernelfile)) try: self.pid_kernels = from_json(find_resource(pid_kernelfile)) except IOError, e: logging.error("Unable to open kernel file %s"%pid_kernelfile) logging.error(e) sys.exit(1)
def get_pid_kernels(self, pid_kernelfile=None, **kwargs): logging.info('Opening file: %s' % (pid_kernelfile)) try: self.pid_kernels = from_json(find_resource(pid_kernelfile)) except IOError, e: logging.error("Unable to open kernel file %s" % pid_kernelfile) logging.error(e) sys.exit(1)
def _get_reco_kernels(self, kernelfile=None, **kwargs): for reco_scale in ['e_reco_scale', 'cz_reco_scale']: if reco_scale in kwargs: if not kwargs[reco_scale]==1: raise ValueError('%s = %.2f not valid for RecoServiceKernelFile!' %(reco_scale, kwargs[reco_scale])) if not kernelfile in [self.kernelfile, None]: logging.info('Reconstruction from non-default kernel file %s!'%kernelfile) return from_json(find_resource(kernelfile)) if not hasattr(self, 'kernels'): logging.info('Using file %s for default reconstruction'%(kernelfile)) self.kernels = from_json(find_resource(kernelfile)) return self.kernels
def get_incomplete_job_idx(fit_directory): """Get job indices of fits that are not flagged as successful.""" assert os.path.isdir(fit_directory), "fit directory does not exist" metadata = from_json(os.path.join(fit_directory, "metadata.json")) grid_shape = tuple(metadata["grid_shape"]) failed_idx = [] for job_idx, grid_idx in enumerate(np.ndindex(grid_shape)): try: gridpoint_json = os.path.join(fit_directory, f"gridpoint_{job_idx:06d}.json.bz2") logging.info(f"Reading {gridpoint_json}") gridpoint_data = from_json(gridpoint_json) except: break if not gridpoint_data["fit_successful"]: failed_idx.append(job_idx) job_idx += 1 return failed_idx
def from_file(fname, fmt=None, **kwargs): """Dispatch correct file reader based on `fmt` (if specified) or guess based on file name's extension. Parameters ---------- fname : string File path / name from which to load data. fmt : None or string If string, for interpretation of the file according to this format. If None, file format is deduced by an extension found in `fname`. **kwargs All other arguments are passed to the function dispatched to read the file. Returns ------- Object instantiated from the file (string, dictionary, ...). Each format is interpreted differently. Raises ------ ValueError If extension is not recognized """ if fmt is None: rootname, ext = os.path.splitext(fname) ext = ext.replace('.', '').lower() else: rootname = fname ext = fmt.lower() if ext in ZIP_EXTS or ext in XOR_EXTS: rootname, inner_ext = os.path.splitext(rootname) inner_ext = inner_ext.replace('.', '').lower() ext = inner_ext fname = resources.find_resource(fname) if ext in jsons.JSON_EXTS: return jsons.from_json(fname, **kwargs) if ext in hdf.HDF5_EXTS: return hdf.from_hdf(fname, **kwargs) if ext in PKL_EXTS: return from_pickle(fname, **kwargs) if ext in CFG_EXTS: return from_cfg(fname, **kwargs) if ext in TXT_EXTS: return from_txt(fname, **kwargs) errmsg = 'File "%s": unrecognized extension "%s"' % (fname, ext) log.logging.error(errmsg) raise ValueError(errmsg)
def get_pid_kernels(self, pid_paramfile=None, PID_offset=0., PID_scale=1., **kwargs): # load parametrization file logging.info('Opening PID parametrization file %s'%pid_paramfile) try: param_str = from_json(find_resource(pid_paramfile)) except IOError, e: logging.error("Unable to open PID parametrization file %s" %pid_paramfile) logging.error(e) sys.exit(1)
def get_pid_kernels(self, pid_paramfile=None, PID_offset=0., PID_scale=1., **kwargs): # load parametrization file logging.info('Opening PID parametrization file %s' % pid_paramfile) try: param_str = from_json(find_resource(pid_paramfile)) except IOError, e: logging.error("Unable to open PID parametrization file %s" % pid_paramfile) logging.error(e) sys.exit(1)
def setup_function(self): scale_file = find_resource(self.scale_file) logging.info("Loading scaling factors from : %s", scale_file) scaling_dict = from_json(scale_file) scale_binning = MultiDimBinning( **scaling_dict[self.variable]["binning"]) scale_factors = np.array(scaling_dict[self.variable]["scales"], dtype=FTYPE) logging.info(f"Binning for ad-hoc systematic: \n {str(scale_binning)}") logging.info( f"scaling factors of ad-hoc systematic:\n {str(scale_factors)}") self.data.representation = scale_binning for container in self.data: container["adhoc_scale_factors"] = scale_factors
def from_file(fname, fmt=None): """Dispatch correct file reader based on fmt (if specified) or guess based on file name's extension""" if fmt is None: base, ext = os.path.splitext(fname) ext = ext.replace('.', '').lower() else: ext = fmt.lower() if ext in JSON_EXTS: return jsons.from_json(fname) elif ext in HDF5_EXTS: return hdf.from_hdf(fname) elif ext in PKL_EXTS: return cPickle.load(file(fname,'rb')) else: errmsg = 'Unrecognized file type/extension: ' + ext logging.error(errmsg) raise TypeError(errmsg)
def from_file(fname, fmt=None): """Dispatch correct file reader based on fmt (if specified) or guess based on file name's extension""" if fmt is None: base, ext = os.path.splitext(fname) ext = ext.replace('.', '').lower() else: ext = fmt.lower() if ext in JSON_EXTS: return jsons.from_json(fname) elif ext in HDF5_EXTS: return hdf.from_hdf(fname) elif ext in PKL_EXTS: return cPickle.load(file(fname, 'rb')) else: errmsg = 'Unrecognized file type/extension: ' + ext logging.error(errmsg) raise TypeError(errmsg)
def from_json(cls, resource): """Instantiate a new Map object from a JSON file. The format of the JSON is generated by the `Map.to_json` method, which converts a Map object to basic types and then numpy arrays are converted in a call to `pisa.utils.jsons.to_json`. Parameters ---------- resource : str A PISA resource specification (see pisa.utils.resources) See Also -------- to_json pisa.utils.jsons.to_json """ state = jsons.from_json(resource) # State is a dict for Map, so instantiate with double-asterisk syntax return cls(**state)
def __init__(self, detector, geom, proc_ver, pid_spec_ver=1, pid_specs=None): geom = str(geom) proc_ver = str(proc_ver) pid_spec_ver = str(pid_spec_ver) if pid_specs is None: pid_specs = 'pid/pid_specifications.json' if isinstance(pid_specs, str): pid_specs = from_json(resources.find_resource(pid_specs)) elif isinstance(pid_specs, collections.Mapping): pass else: raise TypeError('Unhandled `pid_specs` type: "%s"' % type(data_proc_params)) self.detector = detector self.proc_ver = proc_ver self.pid_spec_ver = str(pid_spec_ver) d = pid_specs all_k = [] for wanted_key in [detector, geom, proc_ver, pid_spec_ver]: wanted_key = wanted_key.replace("'", "").lower() for orig_dict_key, subdict in d.items(): dict_key = orig_dict_key.replace("'", "").lower() if (dict_key == wanted_key): d = subdict all_k.append(orig_dict_key) if len(all_k) != 4: raise ValueError('Could not find %s' % str([detector, geom, proc_ver, pid_spec_ver])) self.pid_spec = pid_specs[all_k[0]][all_k[1]][all_k[2]][all_k[3]] # Enforce rules on PID spec: self.validatePIDSpec(self.pid_spec)
def __init__(self, ebins, czbins, reco_param_file=None, **kwargs): """ Parameters needed to instantiate a reconstruction service with parametrizations: * ebins: Energy bin edges * czbins: cos(zenith) bin edges * reco_param_file: JSON containing the parametrizations """ # Load parametrization logging.info('Opening reconstruction parametrization file %s' %reco_param_file) # Needed for self.read_param_string() self.ebins = ebins self.czbins = czbins # Get parametrization param_str = from_json(find_resource(reco_param_file)) self.parametrization = self.read_param_string(param_str) # No **kwargs, so stored kernels will always have reco scales 1.0 RecoServiceBase.__init__(self, ebins, czbins,**kwargs)
def __init__(self, ebins, czbins, reco_param_file=None, **kwargs): """ Parameters needed to instantiate a reconstruction service with parametrizations: * ebins: Energy bin edges * czbins: cos(zenith) bin edges * reco_param_file: JSON containing the parametrizations """ # Load parametrization logging.info('Opening reconstruction parametrization file %s' % reco_param_file) # Needed for self.read_param_string() self.ebins = ebins self.czbins = czbins # Get parametrization param_str = from_json(find_resource(reco_param_file)) self.parametrization = self.read_param_string(param_str) # No **kwargs, so stored kernels will always have reco scales 1.0 RecoServiceBase.__init__(self, ebins, czbins, **kwargs)
def __init__(self, ebins, czbins, aeff_egy_par, aeff_coszen_par, **params): ''' Parameters: * aeff_egy_par - effective area vs. Energy 1D parameterizations for each flavor, in a text file (.dat) * aeff_coszen_par - json file containing 1D coszen parameterization for each flavor ''' logging.info('Initializing AeffServicePar...') self.ebins = ebins self.czbins = czbins ## Load the info from .dat files into a dict... ## Parametric approach treats all NC events the same aeff_coszen_par_str = from_json(find_resource(aeff_coszen_par)) aeff2d_nc = self.get_aeff_flavor('NC', aeff_egy_par, aeff_coszen_par_str) aeff2d_nc_bar = self.get_aeff_flavor('NC_bar', aeff_egy_par, aeff_coszen_par_str) self.aeff_dict = {} logging.info("Creating effective area parametric dict...") for flavor in [ 'nue', 'nue_bar', 'numu', 'numu_bar', 'nutau', 'nutau_bar' ]: flavor_dict = {} logging.debug("Working on %s effective areas" % flavor) aeff2d = self.get_aeff_flavor(flavor, aeff_egy_par, aeff_coszen_par_str) flavor_dict['cc'] = aeff2d flavor_dict['nc'] = aeff2d_nc_bar if 'bar' in flavor else aeff2d_nc self.aeff_dict[flavor] = flavor_dict return
help="Save all stages.") parser.add_argument('-o', '--outfile', dest='outfile', metavar='FILE', type=str, action='store', default="template.json", help='file to store the output') args = parser.parse_args() set_verbosity(args.verbose) with Timer() as t: #Load all the settings model_settings = from_json(args.template_settings) #Select a hierarchy logging.info('Selected %s hierarchy' % ('normal' if args.normal else 'inverted')) params = select_hierarchy(model_settings['params'], normal_hierarchy=args.normal) #Intialize template maker template_maker = TemplateMaker(get_values(params), **model_settings['binning']) tprofile.info(" ==> elapsed time to initialize templates: %s sec" % t.secs) #Now get the actual template with Timer(verbose=False) as t:
def __init__(self, detector, proc_ver, data_proc_params=None): super().__init__() if data_proc_params is None: data_proc_params = 'events/data_proc_params.json' if isinstance(data_proc_params, str): ps = jsons.from_json(resources.find_resource(data_proc_params)) elif isinstance(data_proc_params, dict): ps = data_proc_params else: raise TypeError('Unhandled data_proc_params type passed in arg: ' + type(data_proc_params)) self.detector = detector self.proc_ver = str(proc_ver) self.det_key = [k for k in ps.keys() if k.lower() == self.detector.lower()][0] for key in ps[self.det_key].keys(): lk = key.lower() lpv = self.proc_ver.lower() if lk == lpv or ('v'+lk == lpv) or (lk == 'v'+lpv): self.procver_key = key # This works for PINGU elif ('msu_'+lk == lpv) or (lk == 'msu_'+lpv): self.procver_key = key elif ('nbi_'+lk == lpv) or (lk == 'nbi_'+lpv): self.procver_key = key # Generalising for DeepCore and different selections ps = ps[self.det_key][self.procver_key] self.update(ps) self.trans_nu_code = False if 'nu_code_to_pdg_map' in self: self.trans_nu_code = True try: self.nu_code_to_pdg_map = { int(code): pdg for code, pdg in self['nu_code_to_pdg_map'].items() } except: self.nu_code_to_pdg_map = self['nu_code_to_pdg_map'] # NOTE: the keys are strings so the particular string formatting is # important for indexing into the dict! # Add generic cuts self['cuts'].update({ # Cut for particles only (no anti-particles) str(NuFlav(12).bar_code).lower(): {'fields': ['nu_code'], 'pass_if': 'nu_code > 0'}, # Cut for anti-particles only (no particles) str(NuFlav(-12).bar_code).lower(): {'fields': ['nu_code'], 'pass_if': 'nu_code < 0'}, # Cut for charged-current interactions only str(IntType('cc')).lower(): {'fields': ['interaction_type'], 'pass_if': 'interaction_type == 1'}, # Cut for neutral-current interactions only str(IntType('nc')).lower(): {'fields': ['interaction_type'], 'pass_if': 'interaction_type == 2'}, # True-upgoing cut usinng the zenith field 'true_upgoing_zen': {'fields': ['true_zenith'], 'pass_if': 'true_zenith > pi/2'}, # True-upgoing cut usinng the cosine-zenith field 'true_upgoing_coszen': {'fields': ['true_coszen'], 'pass_if': 'true_coszen < 0'}, }) # Enforce rules on cuts: self.validate_cut_spec(self['cuts'])
assert len(llhfiles) <= len(logfiles), "Data and log directories don't match?" # Output to save to hdf5 file: output_data = {"minimizer_settings": {}, "template_settings": {}, "true_NMH": {}, "true_IMH": {}} logging.warn("Processing {0:d} files".format(len(llhfiles))) mod = len(llhfiles) // 20 start = time.time() for i, filename in enumerate(llhfiles): if (mod > 0) and (i % mod == 0): logging.info(" >> {0:d} files done...".format(i)) try: data = from_json(filename) except Exception as inst: # print(inst) print("Skipping file: ", filename) continue if not output_data["minimizer_settings"]: output_data["minimizer_settings"] = data["minimizer_settings"] if not output_data["template_settings"]: output_data["template_settings"] = data["template_settings"] for key in ["true_NMH", "true_IMH"]: appendTrials(output_data[key], data[key]) if args.log_dir is not None:
three_chi2s_theta23 = {} three_chi2s_theta23['data_NMH'] = {} three_chi2s_theta23['data_IMH'] = {} ten_significances = {} ten_significances['data_NMH'] = [] ten_significances['data_IMH'] = [] ten_chi2s_theta23 = {} ten_chi2s_theta23['data_NMH'] = {} ten_chi2s_theta23['data_IMH'] = {} for infile in sorted(os.listdir(three_true_h_fid_dir)): if os.path.isfile(three_true_h_fid_dir+infile): indict = from_json(three_true_h_fid_dir+infile) theta23_nh = indict['template_settings']['params']['theta23_nh']['value'] theta23_ih = indict['template_settings']['params']['theta23_ih']['value'] assert(theta23_nh == theta23_ih) theta23vals.append(theta23_nh) sin2theta23vals.append(math.pow(math.sin(theta23_nh),2)) three_chi2s_theta23['data_NMH'][theta23_nh] = {'true_h_fiducial': [], 'false_h_best': []} three_chi2s_theta23['data_IMH'][theta23_nh] = {'true_h_fiducial': [], 'false_h_best': []} ten_chi2s_theta23['data_NMH'][theta23_nh] = {'true_h_fiducial': [], 'false_h_best': []} ten_chi2s_theta23['data_IMH'][theta23_nh] = {'true_h_fiducial': [], 'false_h_best': []} theta23vals = sorted(theta23vals) sin2theta23vals = sorted(sin2theta23vals) for theta23 in theta23vals:
'--outfile', type=str, default='llh_data.json', metavar='JSONFILE', help="Output filename.") parser.add_argument('-v', '--verbose', action='count', default=None, help='set verbosity level') args = parser.parse_args() set_verbosity(args.verbose) #Read in the settings template_settings = from_json(args.template_settings) minimizer_settings = from_json(args.minimizer_settings) pseudo_data_settings = from_json( args.pseudo_data_settings ) if args.pseudo_data_settings is not None else template_settings #Workaround for old scipy versions import scipy if scipy.__version__ < '0.12.0': logging.warn('Detected scipy version %s < 0.12.0' % scipy.__version__) if 'maxiter' in minimizer_settings: logging.warn('Optimizer settings for \"maxiter\" will be ignored') minimizer_settings.pop('maxiter') # make sure that both pseudo data and template are using the same # channel. Raise Exception and quit otherwise
action='store_false', help="select the inverted hierarchy") parser.add_argument('-v', '--verbose', action='count', default=None, help='set verbosity level.') parser.add_argument('-s', '--save_all', action='store_true', default=False, help="Save all stages.") parser.add_argument('-o', '--outfile', dest='outfile', metavar='FILE', type=str, action='store',default="template.json", help='file to store the output') args = parser.parse_args() set_verbosity(args.verbose) with Timer() as t: #Load all the settings model_settings = from_json(args.template_settings) #Select a hierarchy logging.info('Selected %s hierarchy'% ('normal' if args.normal else 'inverted')) params = select_hierarchy(model_settings['params'], normal_hierarchy=args.normal) #Intialize template maker template_maker = TemplateMaker(get_values(params), **model_settings['binning']) profile.info(" ==> elapsed time to initialize templates: %s sec"%t.secs) #Now get the actual template with Timer(verbose=False) as t: template_maps = template_maker.get_template(get_values(params),
def trace(self, message, *args, **kws): self.log(logging.TRACE, message, *args, **kws) logging.Logger.trace = trace logging.RootLogger.trace = trace logging.trace = logging.root.trace #Don't move these up, as "trace" might be used in them from pisa.utils.jsons import from_json from pisa.resources.resources import find_resource #Get the logging configuration #Will search in local dir, $PISA and finally package resources logconfig = from_json(find_resource('logging.json')) #Setup the logging system with this config logging.config.dictConfig(logconfig) #Make the loggers public #In case they haven't been defined, this will just inherit from the root logger physics = logging.getLogger('physics') profile = logging.getLogger('profile') def set_verbosity(verbosity): '''Overwrite the verbosity level for the root logger Verbosity should be an integer with the levels just below. ''' #Ignore if no verbosity is given
first_chi2s_livetime = {} first_chi2s_livetime["data_NMH"] = {} first_chi2s_livetime["data_IMH"] = {} second_significances = {} second_significances["data_NMH"] = [] second_significances["data_IMH"] = [] second_chi2s_livetime = {} second_chi2s_livetime["data_NMH"] = {} second_chi2s_livetime["data_IMH"] = {} for infile in sorted(os.listdir(first_true_h_fid_dir)): if os.path.isfile(first_true_h_fid_dir + infile): indict = from_json(first_true_h_fid_dir + infile) livetime = indict["template_settings"]["params"]["livetime"]["value"] livetimevals.append(livetime) first_chi2s_livetime["data_NMH"][livetime] = {"true_h_fiducial": [], "false_h_best": []} first_chi2s_livetime["data_IMH"][livetime] = {"true_h_fiducial": [], "false_h_best": []} second_chi2s_livetime["data_NMH"][livetime] = {"true_h_fiducial": [], "false_h_best": []} second_chi2s_livetime["data_IMH"][livetime] = {"true_h_fiducial": [], "false_h_best": []} livetimevals = sorted(livetimevals) for livetime in livetimevals: # Get chisquare values for first octant true_h_fiducial distributions for trueinfile in sorted(os.listdir(first_true_h_fid_dir)): if os.path.isfile(first_true_h_fid_dir + trueinfile): indict = from_json(first_true_h_fid_dir + trueinfile)
def trace(self, message, *args, **kws): self.log(logging.TRACE, message, *args, **kws) logging.Logger.trace = trace logging.RootLogger.trace = trace logging.trace = logging.root.trace # Don't move these up, as "trace" might be used in them from pisa.utils.jsons import from_json from pisa.resources.resources import find_resource # Get the logging configuration # Will search in local dir, $PISA and finally package resources logconfig = from_json(find_resource("logging.json")) # Setup the logging system with this config logging.config.dictConfig(logconfig) # Make the loggers public # In case they haven't been defined, this will just inherit from the root logger physics = logging.getLogger("physics") profile = logging.getLogger("profile") def set_verbosity(verbosity): """Overwrite the verbosity level for the root logger Verbosity should be an integer with the levels just below. """ # Ignore if no verbosity is given
def load_interpolated_hypersurfaces(input_file): ''' Load a set of interpolated hypersurfaces from a file. Analogously to "load_hypersurfaces", this function returns a collection with a HypersurfaceInterpolator object for each Map. Parameters ---------- input_file : str A JSON input file as produced by fit_hypersurfaces if interpolation params were given. It has the form:: { interpolation_param_spec = { 'param1': {"values": [val1_1, val1_2, ...], "scales_log": True/False} 'param2': {"values": [val2_1, val2_2, ...], "scales_log": True/False} ... 'paramN': {"values": [valN_1, valN_2, ...], "scales_log": True/False} }, 'hs_fits': [ <list of dicts where keys are map names such as 'nue_cc' and values are hypersurface states> ] } Returns ------- collections.OrderedDict dictionary with a :obj:`HypersurfaceInterpolator` for each map ''' assert isinstance(input_file, str) if input_file.endswith("json") or input_file.endswith("json.bz2"): logging.info(f"Loading interpolated hypersurfaces from file: {input_file}") input_data = from_json(input_file) assert set(['interpolation_param_spec', 'hs_fits']).issubset( set(input_data.keys())), 'missing keys' map_names = None # input_data['hs_fits'] is a list of dicts, each dict contains "param_values" # and "hs_fit" logging.info("Reading file complete, generating hypersurfaces...") for hs_fit_dict in input_data['hs_fits']: # this is still not the actual Hypersurface, but a dict with the (linked) # maps and the HS fit for the map... hs_state_maps = hs_fit_dict["hs_fit"] if map_names is None: map_names = list(hs_state_maps.keys()) else: assert set(map_names) == set(hs_state_maps.keys()), "inconsistent maps" # When data is recovered from JSON, the object states are not automatically # converted to the corresponding objects, so we need to do it manually here. for map_name in map_names: hs_state_maps[map_name] = Hypersurface.from_state(hs_state_maps[map_name]) logging.info(f"Read hypersurface maps: {map_names}") # Now we have a list of dicts where the map names are on the lower level. # We need to convert this into a dict of HypersurfaceInterpolator objects. output = collections.OrderedDict() for m in map_names: hs_fits = [{"param_values": fd["param_values"], "hs_fit": fd['hs_fit'][m]} for fd in input_data['hs_fits']] output[m] = HypersurfaceInterpolator(input_data['interpolation_param_spec'], hs_fits) else: raise Exception("unknown file format") return output
def run_interpolated_fit(fit_directory, job_idx, skip_successful=False): """Run the hypersurface fit for a grid point. If `skip_successful` is true, do not run if the `fit_successful` flag is already True. """ assert os.path.isdir(fit_directory), "fit directory does not exist" gridpoint_json = os.path.join(fit_directory, f"gridpoint_{job_idx:06d}.json.bz2") gridpoint_data = from_json(gridpoint_json) if skip_successful and gridpoint_data["fit_successful"]: logging.info(f"Fit at job index {job_idx} already successful, skipping...") return metadata = from_json(os.path.join(fit_directory, "metadata.json")) interpolation_param_spec = metadata["interpolation_param_spec"] # this is a pipeline configuration in the form of an OrderedDict nominal_dataset = metadata["nominal_dataset"] # Why can we still not load PISA objects from JSON that are inside a dict?! Grrr... nominal_dataset["pipeline_cfg"] = pipeline_cfg_from_states( nominal_dataset["pipeline_cfg"] ) # this is a list of pipeline configurations sys_datasets = metadata["sys_datasets"] for sys_dataset in sys_datasets: sys_dataset["pipeline_cfg"] = pipeline_cfg_from_states( sys_dataset["pipeline_cfg"] ) # this is a dict of param_name : value pairs param_values = gridpoint_data["param_values"] # we do a redundant check to make sure the parameter values at this grid point are # correct interpolation_param_names = metadata["interpolation_param_names"] grid_shape = tuple(metadata["grid_shape"]) # the grid point index of this job grid_idx = list(np.ndindex(grid_shape))[job_idx] for i, n in enumerate(interpolation_param_names): ms = "Inconsistent parameter values at grid point!" assert interpolation_param_spec[n]["values"][grid_idx[i]] == param_values[n], ms # now we need to adjust the values of the parameter in all pipelines for this point logging.info(f"updating pipelines with parameter values: {param_values}") for dataset in [nominal_dataset] + sys_datasets: for stage_cfg in dataset["pipeline_cfg"].values(): if "params" not in stage_cfg.keys(): continue for param in interpolation_param_names: if param in stage_cfg["params"].names: stage_cfg["params"][param].value = param_values[param] # these are the parameters of the hypersurface, NOT the ones we interpolate them # over! hypersurface_params = [] for param_state in metadata["hypersurface_params"]: hypersurface_params.append(HypersurfaceParam.from_state(param_state)) # We create Pipeline objects, get their outputs and then forget about the Pipeline # object on purpose! The memory requirement to hold all systematic sets at the same # time is just too large, especially on the cluster. The way we do it below we # only need enough memory for one dataset at a time. nominal_dataset["mapset"] = Pipeline(nominal_dataset["pipeline_cfg"]).get_outputs() for sys_dataset in sys_datasets: sys_dataset["mapset"] = Pipeline(sys_dataset["pipeline_cfg"]).get_outputs() # Merge maps according to the combine regex, if one was provided combine_regex = metadata["combine_regex"] if combine_regex is not None: nominal_dataset["mapset"] = nominal_dataset["mapset"].combine_re(combine_regex) for sys_dataset in sys_datasets: sys_dataset["mapset"] = sys_dataset["mapset"].combine_re(combine_regex) hypersurface_fit_kw = metadata["hypersurface_fit_kw"] hypersurfaces = collections.OrderedDict() log = metadata["log"] # flag determining whether hs fit is run in log-space or not for map_name in nominal_dataset["mapset"].names: nominal_map = nominal_dataset["mapset"][map_name] nominal_param_values = nominal_dataset["sys_params"] sys_maps = [sys_dataset["mapset"][map_name] for sys_dataset in sys_datasets] sys_param_values = [sys_dataset["sys_params"] for sys_dataset in sys_datasets] hypersurface = Hypersurface( # Yes, this MUST be a deepcopy! Otherwise weird memory overwrites happen # and all the numbers get jumbled across the hypersurfaces of different maps params=copy.deepcopy(hypersurface_params), initial_intercept=0. if log else 1., # Initial value for intercept log=log ) hypersurface.fit( nominal_map=nominal_map, nominal_param_values=nominal_param_values, sys_maps=sys_maps, sys_param_values=sys_param_values, norm=True, # Is the space or loading time really a problem? # keep_maps=False, # it would take a lot more space otherwise **hypersurface_fit_kw ) logging.debug("\nFitted hypersurface report:\n%s" % hypersurface) hypersurfaces[map_name] = hypersurface gridpoint_data["hs_fit"] = hypersurfaces gridpoint_data["fit_successful"] = True to_json(gridpoint_data, gridpoint_json)
help='''Settings related to the optimizer used in the LLR analysis.''') parser.add_argument('-n','--ntrials',type=int, default = 1, help="Number of trials to run") parser.add_argument('-s','--save-steps',action='store_true',default=False, dest='save_steps', help="Save all steps the optimizer takes.") parser.add_argument('-o','--outfile',type=str,default='llh_data.json',metavar='JSONFILE', help="Output filename.") parser.add_argument('-v', '--verbose', action='count', default=None, help='set verbosity level') args = parser.parse_args() set_verbosity(args.verbose) #Read in the settings template_settings = from_json(args.template_settings) minimizer_settings = from_json(args.minimizer_settings) #Workaround for old scipy versions import scipy if scipy.__version__ < '0.12.0': logging.warn('Detected scipy version %s < 0.12.0'%scipy.__version__) if 'maxiter' in minimizer_settings: logging.warn('Optimizer settings for \"maxiter\" will be ignored') minimizer_settings.pop('maxiter') #Get the parameters params = template_settings['params'] #store results from all the trials trials = []
help="Plot the fits of DOM efficiency and hole ice for each bin.") parser.add_argument('--detector',type=str,default='', help="Name of detector to put in plot titles") parser.add_argument('--selection',type=str,default='', help="Name of selection to put in plot titles") parser.add_argument('-o','--outdir',type=str, metavar='DIR', required = True, help='''Output directory''') args = parser.parse_args() #Read in the settings detector = args.detector selection = args.selection outdir = args.outdir utils.mkdir(outdir) utils.mkdir(outdir+'/plots/') template_settings = from_json(args.template_settings) czbin_edges = template_settings['binning']['czbins'] ebin_edges = template_settings['binning']['ebins'] channel = template_settings['params']['channel']['value'] x_steps = 0.0001 if args.sim == '4digit': MC_name = '1XXX' elif args.sim == '5digit': MC_name = '1XXXX' elif args.sim == 'dima': MC_name = 'Dima' else: MC_name = 'Other' params = get_values(select_hierarchy(template_settings['params'],normal_hierarchy=True))
def fromFile(cls, filename): """ Load a Fisher matrix from a json file """ return cls(**from_json(filename))
chi2s_theta23['data_IMH'] = {} chi2s_livetime = {} chi2s_livetime['data_NMH'] = {} chi2s_livetime['data_IMH'] = {} chi2s = {} chi2s['data_NMH'] = {} chi2s['data_NMH']['true_h_fiducial'] = [] chi2s['data_NMH']['false_h_best'] = [] chi2s['data_IMH'] = {} chi2s['data_IMH']['true_h_fiducial'] = [] chi2s['data_IMH']['false_h_best'] = [] for infile in sorted(os.listdir(true_h_fid_dir)): if os.path.isfile(true_h_fid_dir+infile): indict = from_json(true_h_fid_dir+infile) if theta23analysis == True: theta23_nh = indict['template_settings']['params']['theta23_nh']['value'] theta23_ih = indict['template_settings']['params']['theta23_ih']['value'] assert(theta23_nh == theta23_ih) theta23vals.append(theta23_nh) sin2theta23vals.append(math.pow(math.sin(theta23_nh),2)) chi2s_theta23['data_NMH'][theta23_nh] = {'true_h_fiducial': [], 'false_h_best': []} chi2s_theta23['data_IMH'][theta23_nh] = {'true_h_fiducial': [], 'false_h_best': []} if livetimeanalysis == True: livetime = indict['template_settings']['params']['livetime']['value'] livetimevals.append(livetime) chi2s_livetime['data_NMH'][livetime] = {'true_h_fiducial': [], 'false_h_best': []} chi2s_livetime['data_IMH'][livetime] = {'true_h_fiducial': [], 'false_h_best': []}
'--outfile', type=str, default='alt_hypo_study.json', metavar='JSONFILE', help="Output filename.") parser.add_argument('-v', '--verbose', action='count', default=None, help='set verbosity level') args = parser.parse_args() set_verbosity(args.verbose) #Read in the settings template_settings = from_json(args.template_settings) minimizer_settings = from_json(args.minimizer_settings) grid_settings = from_json(args.grid_settings) if args.gpu_id is not None: template_settings['params']['gpu_id'] = {} template_settings['params']['gpu_id']['value'] = args.gpu_id template_settings['params']['gpu_id']['fixed'] = True with Timer() as t: template_maker = TemplateMaker(get_values(template_settings['params']), **template_settings['binning']) profile.info("==> elapsed time to initialize templates: %s sec" % t.secs) #Get the parameters params = template_settings['params']
import numpy as np import matplotlib.pyplot as plt plt.rcParams['text.usetex'] = True import scipy.interpolate from pisa.utils.jsons import from_json indict = from_json('/Users/steven/IceCube/PISA/pisa/pisa/resources/aeff/relative-crosssections/genie-weigthed-crosssections.json') axislabels = {} axislabels['nue'] = r'$\nu_e$ CC' axislabels['nue_bar'] = r'$\bar{\nu}_e$ CC' axislabels['numu'] = r'$\nu_{\mu}$ CC' axislabels['numu_bar'] = r'$\bar{\nu}_{\mu}$ CC' axislabels['nutau'] = r'$\nu_{\tau}$ CC' axislabels['nutau_bar'] = r'$\bar{\nu}_{\tau}$ CC' axislabels['nuall'] = r'$\nu$ NC' axislabels['nuallbar'] = r'$\bar{\nu}$ NC' GENIElabels = {} GENIElabels['MaRES'] = r'$M_A^{RES}$' GENIElabels['MaCCQE'] = r'$M_A^{CCQE}$' GENIElabels['AhtBY'] = r'$A_{HT}$' GENIElabels['BhtBY'] = r'$B_{BT}$' GENIElabels['CV1uBY'] = r'$C_{\nu1u}$' GENIElabels['CV2uBY'] = r'$C_{\nu2u}$' GENIEcolours = {} GENIEcolours['MaRES'] = 'b' GENIEcolours['MaCCQE'] = 'r'
default=False, dest='save_templates', help="Do not save the templates for the different test points.") parser.add_argument('-o','--outdir',type=str,default=os.getcwd(),metavar='DIR', help="Output directory") parser.add_argument('-v', '--verbose', action='count', default=None, help='set verbosity level') args = parser.parse_args() # Set verbosity level set_verbosity(args.verbose) # Read the template settings template_settings = from_json(args.template_settings) # This file only contains the number of test points for each parameter (and perhaps eventually a non-linearity criterion) grid_settings = from_json(args.grid_settings) # Get the Fisher matrices for the desired hierarchy and fiducial settings fisher_matrices = get_fisher_matrices(template_settings,grid_settings,args.inverted_truth,args.normal_truth, args.dump_all_stages,args.save_templates,args.outdir) # Fisher matrices are saved in any case for data_tag in fisher_matrices: fisher_basename = 'fisher_data_%s'%data_tag for chan in fisher_matrices[data_tag]: if chan == 'comb': outfile = os.path.join(args.outdir,fisher_basename+'.json')
parser.add_argument('-IH','--IH_osc_dir', type=str, required=True, help="Directory containing oscillation probabilties for different oversampling values for inverted ordring.") parser.add_argument('-r','--reference',type=str, help="Reference value for oversampling (i.e. highest value used)") args = parser.parse_args() NH_path = args.NH_osc_dir IH_path = args.IH_osc_dir reference = args.reference NH_vals = {} IH_vals = {} os_vals = [] for f in sorted(os.listdir(NH_path)): if os.path.isfile(NH_path+f): osc_file = from_json(NH_path+f) for os_key in osc_file.keys(): NH_vals[os_key] = osc_file[os_key] if int(os_key) not in os_vals: os_vals.append(int(os_key)) for f in sorted(os.listdir(IH_path)): if os.path.isfile(IH_path+f): osc_file = from_json(IH_path+f) for os_key in osc_file.keys(): IH_vals[os_key] = osc_file[os_key] os_vals = sorted(os_vals) times = [] oversamples = []
titles["numubar"] = r"$\bar{\nu}_{\mu}$" titles["nue_cc"] = r"$\nu_e$ CC" titles["numu_cc"] = r"$\nu_{\mu}$ CC" titles["nutau_cc"] = r"$\nu_{\tau}$ CC" titles["nuall_nc"] = r"$\nu$ NC" titles["nuebar_cc"] = r"$\bar{\nu}_e$ CC" titles["numubar_cc"] = r"$\bar{\nu}_{\mu}$ CC" titles["nutaubar_cc"] = r"$\bar{\nu}_{\tau}$ CC" titles["nuallbar_nc"] = r"$\bar{\nu}$ NC" titles["trck"] = r"Track-Like" titles["cscd"] = r"Cascade-Like" try: cake_array1 = from_json(args.cake_file1)["maps"] except: cake_array1 = from_json(args.cake_file1) try: cake_array2 = from_json(args.cake_file2)["maps"] except: cake_array2 = from_json(args.cake_file2) for cake_dict1, cake_dict2 in zip(cake_array1, cake_array2): assert cake_dict1["name"] == cake_dict2["name"] cake_map1 = {} cake_map1["map"] = cake_dict1["hist"].T
dest="outfile", metavar="FILE", type=str, action="store", default="event_rate.json", help="""file to store the output""", ) parser.add_argument("-v", "--verbose", action="count", default=None, help="""set verbosity level""") args = parser.parse_args() # Set verbosity level set_verbosity(args.verbose) # Check binning ebins, czbins = check_binning(args.osc_flux_maps) logging.info("Defining aeff_service...") if args.mc_mode: logging.info(" Using effective area from EVENT DATA...") aeff_service = AeffServiceMC(ebins, czbins, aeff_weight_file=args.weighted_aeff_file) else: logging.info(" Using effective area from PARAMETRIZATION...") aeff_settings = from_json(find_resource(args.settings_file)) aeff_service = AeffServicePar(ebins, czbins, **aeff_settings) event_rate_maps = get_event_rates(args.osc_flux_maps, aeff_service, args.livetime, args.aeff_scale) logging.info("Saving output to: %s" % args.outfile) to_json(event_rate_maps, args.outfile)
from pisa.utils.jsons import from_json, to_json from copy import deepcopy as copy parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter) parser.add_argument( 'papa_file', type=str, metavar='JSON', help='papa settings file, containing the resolution parameterizations.') parser.add_argument('outfile', type=str, metavar='JSON', help='output file to store resolutions in PISA-format.') args = parser.parse_args() papa_settings = from_json(args.papa_file) parameterizations = papa_settings['fiducial']['reco_parametrization']['value'] pisa_flavs = ['nue', 'numu', 'nutau'] mID = ['', '_bar'] intType = ['cc', 'nc'] recoType = ['coszen', 'energy'] papa_NC = parameterizations['NC'] egy_res = papa_NC['e'] papa_NC.pop('e') papa_NC['energy'] = egy_res pisa_reco_settings = {} for flav in pisa_flavs: papa_res = parameterizations[flav]
#Only show errors while parsing set_verbosity(0) parser = ArgumentParser(description='Takes a reco event rate file ' 'as input and produces a set of reconstructed templates ' 'of tracks and cascades.', formatter_class=RawTextHelpFormatter) parser.add_argument('reco_event_maps',metavar='RECOEVENTS',type=from_json, help='''JSON reco event rate file with following parameters: {"nue_cc": {'czbins':[...], 'ebins':[...], 'map':[...]}, "numu_cc": {...}, "nutau_cc": {...}, "nuall_nc": {...} }''') parser.add_argument('--settings',metavar='SETTINGS',type=from_json, default=from_json(find_resource('pid/V15_pid.json')), help='''json file containing parameterizations of the particle ID for each event type.''') parser.add_argument('-o', '--outfile', dest='outfile', metavar='FILE', type=str, action='store',default="pid.json", help='''file to store the output''') parser.add_argument('-v', '--verbose', action='count', default=0, help='''set verbosity level''') args = parser.parse_args() #Set verbosity level set_verbosity(args.verbose) #Check binning ebins, czbins = check_binning(args.reco_event_maps)
type=str, default=os.getcwd(), metavar='DIR', help="Output directory") parser.add_argument('-v', '--verbose', action='count', default=None, help='set verbosity level') args = parser.parse_args() # Set verbosity level set_verbosity(args.verbose) # Read the template settings template_settings = from_json(args.template_settings) # This file only contains the number of test points for each parameter (and # perhaps eventually a non-linearity criterion) grid_settings = from_json(args.grid_settings) # Get the Fisher matrices for the desired hierarchy and fiducial settings fisher_matrices = get_fisher_matrices(template_settings=template_settings, grid_settings=grid_settings, IMH=args.inverted_truth, NMH=args.normal_truth, dump_all_stages=args.dump_all_stages, save_templates=args.save_templates, outdir=args.outdir) # Fisher matrices are saved in any case
logging.warn("processing " + str(len(args.infiles)) + " files...") logging.warn("Saving to file: %s" % args.outfile) mod_num = len(args.infiles) / 20 start_time = datetime.now() minimizer_settings = {} template_settings = {} pseudo_data_settings = {} trials = {} for i, filename in enumerate(args.infiles): if mod_num > 0: if i % mod_num == 0: print " >> %d files done..." % i try: data = from_json(filename) except: print "Skipping file: ", filename continue if not minimizer_settings: minimizer_settings = data['minimizer_settings'] if not template_settings: template_settings = data['template_settings'] if not pseudo_data_settings: try: pseudo_data_settings = data['pseudo_data_settings'] except: pass
titles['nue_bar'] = r'$\bar{\nu}_e$' titles['nuebar'] = r'$\bar{\nu}_e$' titles['numu'] = r'$\nu_{\mu}$' titles['numu_bar'] = r'$\bar{\nu}_{\mu}$' titles['numubar'] = r'$\bar{\nu}_{\mu}$' titles['nue_cc'] = r'$\nu_e$ CC' titles['numu_cc'] = r'$\nu_{\mu}$ CC' titles['nutau_cc'] = r'$\nu_{\tau}$ CC' titles['nuall_nc'] = r'$\nu$ NC' titles['trck'] = r'Track-Like' titles['cscd'] = r'Cascade-Like' try: cake_array = from_json(args.cake_file)['maps'] except: cake_array = from_json(args.cake_file) pisa_dict = from_json(args.pisa_file) for cake_dict in cake_array: if cake_dict['name'] == 'numubar': pisa_map = pisa_dict['numu_bar'] elif cake_dict['name'] == 'nuebar': pisa_map = pisa_dict['nue_bar'] else: pisa_map = pisa_dict[cake_dict['name']] cake_map = {} cake_map['map'] = cake_dict['hist'].T
free_chi2s_livetime = {} free_chi2s_livetime['data_NMH'] = {} free_chi2s_livetime['data_IMH'] = {} prior_significances = {} prior_significances['data_NMH'] = [] prior_significances['data_IMH'] = [] prior_chi2s_livetime = {} prior_chi2s_livetime['data_NMH'] = {} prior_chi2s_livetime['data_IMH'] = {} for infile in sorted(os.listdir(free_true_h_fid_dir)): if os.path.isfile(free_true_h_fid_dir+infile): indict = from_json(free_true_h_fid_dir+infile) livetime = indict['template_settings']['params']['livetime']['value'] livetimevals.append(livetime) free_chi2s_livetime['data_NMH'][livetime] = {'true_h_fiducial': [], 'false_h_best': []} free_chi2s_livetime['data_IMH'][livetime] = {'true_h_fiducial': [], 'false_h_best': []} prior_chi2s_livetime['data_NMH'][livetime] = {'true_h_fiducial': [], 'false_h_best': []} prior_chi2s_livetime['data_IMH'][livetime] = {'true_h_fiducial': [], 'false_h_best': []} livetimevals = sorted(livetimevals) for livetime in livetimevals: # Get chisquare values for free octant true_h_fiducial distributions for trueinfile in sorted(os.listdir(free_true_h_fid_dir)): if os.path.isfile(free_true_h_fid_dir+trueinfile): indict = from_json(free_true_h_fid_dir+trueinfile)
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter from pisa.utils.jsons import from_json, to_json import numpy as np parser = ArgumentParser(description='''Determines the false_h_best fiducial distribution, under the Gaussian assumption.''', formatter_class=ArgumentDefaultsHelpFormatter) parser.add_argument('-p','--pisa_file', type=str, required=True, help="File containing PISA V2 event by event data") args = parser.parse_args() pisa_dict = from_json(args.pisa_file)['pid']['reduced'] to_json(pisa_dict,'better.json')
def plot_clsim_table_summary( summaries, formats=None, outdir=None, no_legend=False ): """Plot the table summary produced by `summarize_clsim_table`. Plots are made of marginalized 1D distributions, where mean, median, and/or max are used to marginalize out the remaining dimensions (where those are present in the summaries).. Parameters ---------- summaries : string, summary, or iterable thereof If string(s) are provided, each is glob-expanded. See :method:`glob.glob` for valid syntax. formats : None, string, or iterable of strings in {'pdf', 'png'} If no formats are provided, the plot will not be saved. outdir : None or string If `formats` is specified and `outdir` is None, the plots are saved to the present working directory. no_legend : bool, optional Do not display legend on plots (default is to display a legend) Returns ------- all_figs : list of three :class:`matplotlib.figure.Figure` all_axes : list of three lists of :class:`matplotlib.axes.Axes` summaries : list of :class:`collections.OrderedDict` List of all summaries loaded """ orig_summaries = deepcopy(summaries) if isinstance(summaries, (basestring, Mapping)): summaries = [summaries] tmp_summaries = [] for summary in summaries: if isinstance(summary, Mapping): tmp_summaries.append(summary) elif isinstance(summary, basestring): tmp_summaries.extend(glob(expand(summary))) summaries = tmp_summaries for summary_n, summary in enumerate(summaries): if isinstance(summary, basestring): summary = from_json(summary) summaries[summary_n] = summary if formats is None: formats = [] elif isinstance(formats, basestring): formats = [formats] if outdir is not None: outdir = expand(outdir) mkdir(outdir) n_summaries = len(summaries) if n_summaries == 0: raise ValueError( 'No summaries found based on argument `summaries`={}' .format(orig_summaries) ) for n, fmt in enumerate(formats): fmt = fmt.strip().lower() assert fmt in ('pdf', 'png'), fmt formats[n] = fmt all_items = OrderedDict() for summary in summaries: for key, value in summary.items(): if key == 'dimensions': continue if not all_items.has_key(key): all_items[key] = [] all_items[key].append(value) same_items = OrderedDict() different_items = OrderedDict() for key, values in all_items.items(): all_same = True ref_value = values[0] for value in values[1:]: if np.any(value != ref_value): all_same = False if all_same: same_items[key] = values[0] else: different_items[key] = values if n_summaries > 1: if same_items: print('Same for all:\n{}'.format(same_items.keys())) if different_items: print('Different for some or all:\n{}' .format(different_items.keys())) same_label = formatter(same_items) summary_has_detail = False if set(['string', 'depth_idx', 'seed']).issubset(all_items.keys()): summary_has_detail = True strings = sorted(set(all_items['string'])) depths = sorted(set(all_items['depth_idx'])) seeds = sorted(set(all_items['seed'])) plot_kinds = ('mean', 'median', 'max') plot_kinds_with_data = set() dim_names = summaries[0]['dimensions'].keys() n_dims = len(dim_names) fig_x = 10 # inches fig_header_y = 0.35 # inches fig_one_axis_y = 5 # inches fig_all_axes_y = n_dims * fig_one_axis_y fig_y = fig_header_y + fig_all_axes_y # inches all_figs = [] all_axes = [] for plot_kind in plot_kinds: fig, f_axes = plt.subplots( nrows=n_dims, ncols=1, squeeze=False, figsize=(fig_x, fig_y) ) all_figs.append(fig) f_axes = list(f_axes.flat) for ax in f_axes: ax.set_prop_cycle('color', COLOR_CYCLE_ORTHOG) all_axes.append(f_axes) n_lines = 0 xlims = [[np.inf, -np.inf]] * n_dims summaries_order = [] if summary_has_detail: for string, depth_idx, seed in product(strings, depths, seeds): for summary_n, summary in enumerate(summaries): if (summary['string'] != string or summary['depth_idx'] != depth_idx or summary['seed'] != seed): continue summaries_order.append((summary_n, summary)) else: for summary_n, summary in enumerate(summaries): summaries_order.append((summary_n, summary)) labels_assigned = set() for summary_n, summary in summaries_order: different_label = formatter({k: v[summary_n] for k, v in different_items.items()}) if different_label: label = different_label if label in labels_assigned: label = None else: labels_assigned.add(label) else: label = None for dim_num, dim_name in enumerate(dim_names): dim_info = summary['dimensions'][dim_name] dim_axes = [f_axes[dim_num] for f_axes in all_axes] bin_edges = summary[dim_name + '_bin_edges'] if dim_name == 'deltaphidir': bin_edges /= np.pi xlims[dim_num] = [ min(xlims[dim_num][0], np.min(bin_edges)), max(xlims[dim_num][1], np.max(bin_edges)) ] for ax, plot_kind in zip(dim_axes, plot_kinds): if plot_kind not in dim_info: continue plot_kinds_with_data.add(plot_kind) vals = dim_info[plot_kind] ax.step(bin_edges, [vals[0]] + list(vals), linewidth=1, clip_on=True, label=label) n_lines += 1 dim_labels = dict( r=r'$r$', costheta=r'$\cos\theta$', t=r'$t$', costhetadir=r'$\cos\theta_{\rm dir}$', deltaphidir=r'$\Delta\phi_{\rm dir}$' ) units = dict(r='m', t='ns', deltaphidir=r'rad/$\pi$') logx_dims = [] logy_dims = ['r', 'time', 'deltaphidir'] flabel = '' same_flabel = formatter(same_items, fname=True) different_flabel = formatter(different_items, key_only=True, fname=True) if same_flabel: flabel += '__same__' + same_flabel if different_flabel: flabel += '__differ__' + different_flabel for kind_idx, (plot_kind, fig) in enumerate(zip(plot_kinds, all_figs)): if plot_kind not in plot_kinds_with_data: continue for dim_num, (dim_name, ax) in enumerate(zip(dim_names, all_axes[kind_idx])): #if dim_num == 0 and different_items: if different_items and not no_legend: ax.legend(loc='best', frameon=False, prop=dict(size=7, family='monospace')) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.yaxis.set_ticks_position('none') ax.xaxis.set_ticks_position('none') ax.xaxis.tick_bottom() ax.yaxis.tick_left() ax.set_xlim(xlims[dim_num]) xlabel = dim_labels[dim_name] if dim_name in units: xlabel += ' ({})'.format(units[dim_name]) ax.set_xlabel(xlabel) if dim_name in logx_dims: ax.set_xscale('log') if dim_name in logy_dims: ax.set_yscale('log') fig.tight_layout(rect=(0, 0, 1, fig_all_axes_y/fig_y)) suptitle = ( 'Marginalized distributions (taking {} over all other axes)' .format(plot_kind) ) if same_label: suptitle += '\n' + same_label fig.suptitle(suptitle, y=(fig_all_axes_y + fig_header_y*0.8) / fig_y, fontsize=9) for fmt in formats: outfpath = ('clsim_table_summaries{}__{}.{}' .format(flabel, plot_kind, fmt)) if outdir: outfpath = join(outdir, outfpath) fig.savefig(outfpath, dpi=300) print('Saved image to "{}"'.format(outfpath)) return all_figs, all_axes, summaries
def run_interpolated_fit(fit_directory, job_idx, skip_successful=False): """Run the hypersurface fit for a grid point. If `skip_successful` is true, do not run if the `fit_successful` flag is already True. """ #TODO a lot of this is copied from fit_hypersurfaces in hypersurface.py, would be safer to make more OAOO #TODO Copy the param value storage stuff from fit_hypersurfaces across in the meantime assert os.path.isdir(fit_directory), "fit directory does not exist" gridpoint_json = os.path.join(fit_directory, f"gridpoint_{job_idx:06d}.json.bz2") gridpoint_data = from_json(gridpoint_json) if skip_successful and gridpoint_data["fit_successful"]: logging.info( f"Fit at job index {job_idx} already successful, skipping...") return metadata = from_json(os.path.join(fit_directory, "metadata.json")) interpolation_param_spec = metadata["interpolation_param_spec"] # this is a pipeline configuration in the form of an OrderedDict nominal_dataset = metadata["nominal_dataset"] # Why can we still not load PISA objects from JSON that are inside a dict?! Grrr... nominal_dataset["pipeline_cfg"] = pipeline_cfg_from_states( nominal_dataset["pipeline_cfg"]) # this is a list of pipeline configurations sys_datasets = metadata["sys_datasets"] for sys_dataset in sys_datasets: sys_dataset["pipeline_cfg"] = pipeline_cfg_from_states( sys_dataset["pipeline_cfg"]) # this is a dict of param_name : value pairs param_values = gridpoint_data["param_values"] # we do a redundant check to make sure the parameter values at this grid point are # correct interpolation_param_names = metadata["interpolation_param_names"] grid_shape = tuple(metadata["grid_shape"]) # the grid point index of this job grid_idx = list(np.ndindex(grid_shape))[job_idx] for i, n in enumerate(interpolation_param_names): ms = "Inconsistent parameter values at grid point!" assert interpolation_param_spec[n]["values"][ grid_idx[i]] == param_values[n], ms # now we need to adjust the values of the parameter in all pipelines for this point logging.info(f"updating pipelines with parameter values: {param_values}") for dataset in [nominal_dataset] + sys_datasets: for stage_cfg in dataset["pipeline_cfg"].values(): if "params" not in stage_cfg.keys(): continue for param in interpolation_param_names: if param in stage_cfg["params"].names: stage_cfg["params"][param].value = param_values[param] # these are the parameters of the hypersurface, NOT the ones we interpolate them # over! hypersurface_params = [] for param_state in metadata["hypersurface_params"]: hypersurface_params.append(HypersurfaceParam.from_state(param_state)) def find_hist_stage(pipeline): """Locate the index of the hist stage in a pipeline.""" hist_idx_found = False for i, s in enumerate(pipeline.stages): if s.__class__.__name__ == "hist": hist_idx = i hist_idx_found = True break if not hist_idx_found: raise RuntimeError( "Could not find histogram stage in pipeline, aborting.") return hist_idx # We create Pipeline objects, get their outputs and then forget about the Pipeline # object on purpose! The memory requirement to hold all systematic sets at the same # time is just too large, especially on the cluster. The way we do it below we # only need enough memory for one dataset at a time. for dataset in [nominal_dataset] + sys_datasets: pipeline = Pipeline(dataset["pipeline_cfg"]) dataset["mapset"] = pipeline.get_outputs() # get the un-weighted event counts as well so that we can exclude bins # with too little statistics # First, find out which stage is the hist stage hist_idx = find_hist_stage(pipeline) pipeline.stages[hist_idx].unweighted = True dataset["mapset_unweighted"] = pipeline.get_outputs() del pipeline # Merge maps according to the combine regex, if one was provided combine_regex = metadata["combine_regex"] if combine_regex is not None: for dataset in [nominal_dataset] + sys_datasets: dataset["mapset"] = dataset["mapset"].combine_re(combine_regex) dataset["mapset_unweighted"] = dataset[ "mapset_unweighted"].combine_re(combine_regex) minimum_mc = metadata["minimum_mc"] # Remove bins (i.e. set their count to zero) that have too few MC events for dataset in sys_datasets + [nominal_dataset]: for map_name in dataset["mapset"].names: insuff_mc = dataset["mapset_unweighted"][ map_name].nominal_values < minimum_mc # Setting the hist to zero sets both nominal value and std_dev to zero dataset["mapset"][map_name].hist[insuff_mc] = 0. hypersurface_fit_kw = metadata["hypersurface_fit_kw"] hypersurfaces = collections.OrderedDict() log = metadata[ "log"] # flag determining whether hs fit is run in log-space or not for map_name in nominal_dataset["mapset"].names: nominal_map = nominal_dataset["mapset"][map_name] nominal_param_values = nominal_dataset["sys_params"] sys_maps = [ sys_dataset["mapset"][map_name] for sys_dataset in sys_datasets ] sys_param_values = [ sys_dataset["sys_params"] for sys_dataset in sys_datasets ] hypersurface = Hypersurface( # Yes, this MUST be a deepcopy! Otherwise weird memory overwrites happen # and all the numbers get jumbled across the hypersurfaces of different maps params=copy.deepcopy(hypersurface_params), initial_intercept=0. if log else 1., # Initial value for intercept log=log) hypersurface.fit( nominal_map=nominal_map, nominal_param_values=nominal_param_values, sys_maps=sys_maps, sys_param_values=sys_param_values, norm=True, # Is the space or loading time really a problem? # keep_maps=False, # it would take a lot more space otherwise **hypersurface_fit_kw) logging.debug("\nFitted hypersurface report:\n%s" % hypersurface) hypersurfaces[map_name] = hypersurface gridpoint_data["hs_fit"] = hypersurfaces gridpoint_data["fit_successful"] = True to_json(gridpoint_data, gridpoint_json)
# author: Timothy C. Arlen # [email protected] # from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter from pisa.utils.jsons import from_json,to_json from copy import deepcopy as copy parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter) parser.add_argument('papa_file',type=str,metavar='JSON', help='papa settings file, containing the resolution parameterizations.') parser.add_argument('outfile',type=str,metavar='JSON', help='output file to store resolutions in PISA-format.') args = parser.parse_args() papa_settings = from_json(args.papa_file) parameterizations = papa_settings['fiducial']['reco_parametrization']['value'] pisa_flavs = ['nue','numu','nutau'] mID = ['','_bar'] intType = ['cc','nc'] recoType = ['coszen','energy'] papa_NC = parameterizations['NC'] egy_res = papa_NC['e'] papa_NC.pop('e') papa_NC['energy'] = egy_res pisa_reco_settings = {} for flav in pisa_flavs: papa_res = parameterizations[flav]
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter from pisa.utils.jsons import from_json, to_json import numpy as np parser = ArgumentParser(description='''Determines the false_h_best fiducial distribution, under the Gaussian assumption.''', formatter_class=ArgumentDefaultsHelpFormatter) parser.add_argument('-c','--cake_file', type=str, required=True, help="File containing CAKE (PISA V3) data") args = parser.parse_args() cake_array = from_json(args.cake_file)['maps'] output_array = [] total_trck_dict = {} total_cscd_dict = {} for cake_dict in cake_array: if 'trck' in cake_dict['name']: if len(total_trck_dict.keys()) == 0: total_trck_dict = cake_dict total_trck_dict['name'] = 'trck' else: total_trck_dict['hist'] += cake_dict['hist'] elif 'cscd' in cake_dict['name']: if len(total_cscd_dict.keys()) == 0: total_cscd_dict = cake_dict total_cscd_dict['name'] = 'cscd' else:
parser.add_argument('-v', '--verbose', action='count', default=None, help='''set verbosity level''') args = parser.parse_args() #Set verbosity level set_verbosity(args.verbose) #Check binning ebins, czbins = check_binning(args.osc_flux_maps) logging.info("Defining aeff_service...") if args.mc_mode: logging.info(" Using effective area from EVENT DATA...") aeff_service = AeffServiceMC(ebins, czbins, aeff_weight_file=args.weighted_aeff_file) else: logging.info(" Using effective area from PARAMETRIZATION...") aeff_settings = from_json(find_resource(args.settings_file)) aeff_service = AeffServicePar(ebins, czbins, **aeff_settings) event_rate_maps = get_event_rates(args.osc_flux_maps, aeff_service, args.livetime, args.aeff_scale) logging.info("Saving output to: %s" % args.outfile) to_json(event_rate_maps, args.outfile)
parser.add_argument('-pd','--pseudo_data_settings',type=str, metavar='JSONFILE',default=None, help='''Settings for pseudo data templates, if desired to be different from template_settings.''') parser.add_argument('-s','--save-steps',action='store_true',default=False, dest='save_steps', help="Save all steps the optimizer takes.") parser.add_argument('-o','--outfile',type=str,default='llh_data.json',metavar='JSONFILE', help="Output filename.") parser.add_argument('-v', '--verbose', action='count', default=None, help='set verbosity level') args = parser.parse_args() set_verbosity(args.verbose) # Read in the settings template_settings = from_json(args.template_settings) minimizer_settings = from_json(args.minimizer_settings) pseudo_data_settings = from_json(args.pseudo_data_settings) if args.pseudo_data_settings is not None else template_settings # Parse the metric to be used metric_name = 'chisquare' if args.use_chisquare else 'llh' # Workaround for old scipy versions import scipy if scipy.__version__ < '0.12.0': logging.warn('Detected scipy version %s < 0.12.0'%scipy.__version__) if 'maxiter' in minimizer_settings: logging.warn('Optimizer settings for \"maxiter\" will be ignored') minimizer_settings.pop('maxiter')
def summarize_clsim_table(table_fpath, table=None, save_summary=True, outdir=None): """ Parameters ---------- table_fpath : string Path to table (or just the table's filename if `outdir` is specified) table : mapping, optional If the table has already been loaded, it can be passed here to avoid re-loading the table. save_summary : bool Whether to save the table summary to disk. outdir : string, optional If `save_summary` is True, write the summary to this directory. If `outdir` is not specified and `save_summary` is True, the summary will be written to the same directory that contains `table_fpath`. Returns ------- table See `load_clsim_table` for details of the data structure summary : OrderedDict """ t_start = time() if save_summary: from pisa.utils.jsons import from_json, to_json table_fpath = expand(table_fpath) srcdir, clsim_fname = dirname(table_fpath), basename(table_fpath) invalid_fname = False try: fname_info = interpret_clsim_table_fname(clsim_fname) except ValueError: invalid_fname = True fname_info = {} if outdir is None: outdir = srcdir outdir = expand(outdir) mkdir(outdir) if invalid_fname: metapath = None else: metaname = (CLSIM_TABLE_METANAME_PROTO[-1].format( hash_val=fname_info['hash_val'])) metapath = join(outdir, metaname) if metapath and isfile(metapath): meta = from_json(metapath) else: meta = dict() if table is None: table = load_clsim_table(table_fpath) summary = OrderedDict() for key in table.keys(): if key == 'table': continue summary[key] = table[key] if fname_info: for key in ('hash_val', 'string', 'depth_idx', 'seed'): summary[key] = fname_info[key] # TODO: Add hole ice info when added to tray_kw_to_hash if meta: summary['n_events'] = meta['tray_kw_to_hash']['NEvents'] summary['ice_model'] = meta['tray_kw_to_hash']['IceModel'] summary['tilt'] = not meta['tray_kw_to_hash']['DisableTilt'] for key, val in meta.items(): if key.endswith('_binning_kw'): summary[key] = val elif 'fname_version' in fname_info and fname_info['fname_version'] == 1: summary['n_events'] = fname_info['n_events'] summary['ice_model'] = 'spice_mie' summary['tilt'] = False summary['r_binning_kw'] = dict(min=0.0, max=400.0, n_bins=200, power=2) summary['costheta_binning_kw'] = dict(min=-1, max=1, n_bins=40) summary['t_binning_kw'] = dict(min=0.0, max=3000.0, n_bins=300) summary['costhetadir_binning_kw'] = dict(min=-1, max=1, n_bins=20) summary['deltaphidir_binning_kw'] = dict(min=0.0, max=np.pi, n_bins=20) # Save marginal distributions and info to file norm = ( 1 / table['n_photons'] / (SPEED_OF_LIGHT_M_PER_NS / table['phase_refractive_index'] * np.mean(np.diff(table['t_bin_edges']))) #* table['angular_acceptance_fract'] * (len(table['costheta_bin_edges']) - 1)) summary['norm'] = norm dim_names = ('r', 'costheta', 't', 'costhetadir', 'deltaphidir') n_dims = len(table['table_shape']) assert n_dims == len(dim_names) # Apply norm to underflow and overflow so magnitudes can be compared # relative to plotted marginal distributions for flow, idx in product(('underflow', 'overflow'), iter(range(n_dims))): summary[flow][idx] = summary[flow][idx] * norm wstderr('Finding marginal distributions...\n') wstderr(' masking off zeros in table...') t0 = time() nonzero_table = np.ma.masked_equal(table['table'], 0) wstderr(' ({} ms)\n'.format(np.round((time() - t0) * 1e3, 3))) t0_marg = time() summary['dimensions'] = OrderedDict() for keep_axis, ax_name in zip(tuple(range(n_dims)), dim_names): remove_axes = list(range(n_dims)) remove_axes.pop(keep_axis) remove_axes = tuple(remove_axes) axis = OrderedDict() wstderr(' mean across non-{} axes...'.format(ax_name)) t0 = time() axis['mean'] = norm * np.asarray( np.mean(table['table'], axis=remove_axes)) wstderr(' ({} s)\n'.format(np.round(time() - t0, 3))) wstderr(' median across non-{} axes...'.format(ax_name)) t0 = time() axis['median'] = norm * np.asarray( np.ma.median(nonzero_table, axis=remove_axes)) wstderr(' ({} s)\n'.format(np.round(time() - t0, 3))) wstderr(' max across non-{} axes...'.format(ax_name)) t0 = time() axis['max'] = norm * np.asarray( np.max(table['table'], axis=remove_axes)) wstderr(' ({} s)\n'.format(np.round(time() - t0, 3))) summary['dimensions'][ax_name] = axis wstderr(' Total time to find marginal distributions: {} s\n'.format( np.round(time() - t0_marg, 3))) if save_summary: ext = None base_fname = clsim_fname while ext not in ('', '.fits'): base_fname, ext = splitext(base_fname) ext = ext.lower() outfpath = join(outdir, base_fname + '_summary.json.bz2') to_json(summary, outfpath) print('saved summary to "{}"'.format(outfpath)) wstderr('Time to summarize table: {} s\n'.format( np.round(time() - t_start, 3))) return table, summary
help="settings file to use for making templates.") hselect = parser.add_mutually_exclusive_group(required=False) hselect.add_argument('--normal', dest='normal', default=True, action='store_true', help="select the normal hierarchy") hselect.add_argument('--inverted', dest='normal', default = False, action='store_false', help="select the inverted hierarchy") parser.add_argument('-v','--verbose',action='count',default=None, help='set verbosity level.') args = parser.parse_args() set_verbosity(args.verbose) profile.info("start initializing") #Load all the settings model_settings = from_json(args.settings) #Select a hierarchy logging.info('Selected %s hierarchy'% ('normal' if args.normal else 'inverted')) params = select_hierarchy(model_settings['params'],normal_hierarchy=args.normal) #Intialize template maker template_maker = TemplateMaker(get_values(params),**model_settings['binning']) profile.info("stop initializing") #Now get the actual template profile.info("start template calculation") template_maker.get_template(get_values(params)) profile.info("stop template calculation")