def __init__( self, ebins, czbins, detector_depth=None, earth_model=None, prop_height=None, osc_precision=5e-4, **kwargs ): """ Parameters needed to instantiate a NucraftOscillationService: * ebins: Energy bin edges * czbins: cos(zenith) bin edges * earth_model: Earth density model used for matter oscillations. * detector_depth: Detector depth in km. * prop_height: Height in the atmosphere to begin in km. Default: 'sample', samples from a parametrization to the atmospheric interaction model presented in "Path length distributions of atmospheric neutrinos", Gaisser and Stanev, PhysRevD.57.1977 * osc_precision: Numerical precision for oscillation probabilities """ OscillationServiceBase.__init__(self, ebins, czbins) logging.info("Initializing %s..." % self.__class__.__name__) print get_params() report_params(get_params(), ["km", "", "", "km"]) self.prop_height = prop_height # km above spherical Earth surface # print "\n\n self.prop_height: ",self.prop_height """ height_mode = 0 ensures that interaction takes place at chosen height """ """ whereas height_mode = 1 samples single altitude from distribution """ self.height_mode = 3 if self.prop_height == "sample" else 0 logging.debug("NuCraft height mode: %d" % self.height_mode) self.detector_depth = detector_depth # km below spherical Earth surface self.num_prec = osc_precision self.get_earth_model(earth_model)
def build_fisher_matrix(gradient_maps, fiducial_map, template_settings): # fix the ordering of parameters params = gradient_maps.keys() fisher = {} for chan in gradient_maps[params[0]]: #Find non-empty bins in flattened map nonempty = np.nonzero(fiducial_map[chan]['map'].flatten()) logging.info("Using %u non-empty bins of %u"%(len(nonempty[0]), len(fiducial_map[chan]['map'].flatten()))) #get gradients as calculated above for non-zero bins gradients = np.array([gradient_maps[par][chan]['map'].flatten()[nonempty] for par in params]) # get error estimate from best-fit bin count for non-zero bins sigmas = np.sqrt(fiducial_map[chan]['map'].flatten()[nonempty]) #Loop over all parameter per bin (simple transpose) and calculate Fisher #matrix per by getting the outer product of all gradients in a bin. #Result is sum of matrix for all bins fmatrix = np.zeros((len(params), len(params))) for bin_gradients, bin_sigma in zip(gradients.T,sigmas.flatten()): fmatrix += np.outer(bin_gradients, bin_gradients)/bin_sigma**2 #And construct the fisher matrix object fisher[chan] = FisherMatrix(matrix=fmatrix, parameters=params, #order is important here! best_fits=[template_settings[par]['value'] for par in params], priors=[template_settings[par]['prior'] for par in params], ) #Return all fisher matrices return fisher
def __init__(self, flux_file=None, smooth=0.05, **params): logging.info("Loading atmospheric flux table %s" %flux_file) #Load the data table table = np.loadtxt(open_resource(flux_file)).T #columns in Honda files are in the same order cols = ['energy']+primaries flux_dict = dict(zip(cols, table)) for key in flux_dict.iterkeys(): #There are 20 lines per zenith range flux_dict[key] = np.array(np.split(flux_dict[key], 20)) if not key=='energy': flux_dict[key] = flux_dict[key].T #Set the zenith and energy range flux_dict['energy'] = flux_dict['energy'][0] flux_dict['coszen'] = np.linspace(0.95, -0.95, 20) #Now get a spline representation of the flux table. logging.debug('Make spline representation of flux') # do this in log of energy and log of flux (more stable) logE, C = np.meshgrid(np.log10(flux_dict['energy']), flux_dict['coszen']) self.spline_dict = {} for nutype in primaries: #Get the logarithmic flux log_flux = np.log10(flux_dict[nutype]).T #Get a spline representation spline = bisplrep(logE, C, log_flux, s=smooth) #and store self.spline_dict[nutype] = spline
def get_osc_probLT_dict(self,deltam21=None,deltam31=None,theta12=None, theta13=None,theta23=None,deltacp=None,**kwargs): """ Returns an oscillation probability map dictionary calculated at the values of the input parameters: deltam21,deltam31,theta12,theta13,theta23,deltacp for flavor_from to flavor_to, with the binning of ebins,czbins. The dictionary is formatted as: 'nue_maps': {'nue':map,'numu':map,'nutau':map}, 'numu_maps': {...} 'nue_bar_maps': {...} 'numu_bar_maps': {...} """ ######################################################################## ### TRUE ALGORITHM WHEN WE DECIDE ON HOW TO HANDLE OSC PROB DATA ### # step 1: identify where the data is located: on disk or on server? # # step 2: downsample these maps if not already done, for ebins, czbins # # step 3: do interpolation in oscillation parameters to arrive at the # # maps for (deltam21,deltam31,theta12,theta13,theta23,deltacp) # # return dictionary of smoothed, interpolated map. # ######################################################################## if deltam31 > 0.0: filename = os.path.join(self.datadir,'oscProbLT_dm31_0.246_th23_38.645.hdf5') else: filename = os.path.join(self.datadir+'oscProbLT_dm31_-0.238_th23_38.645.hdf5') logging.info("Loading file: %s"%filename) osc_probLT_dict = get_osc_probLT_dict_hdf5(filename) return osc_probLT_dict
def check_kernels(self, kernels): """Test whether the reco kernels have the correct shape.""" # check axes logging.debug('Checking binning of reconstruction kernels') for kernel_axis, own_axis in [(kernels['ebins'], self.ebins), (kernels['czbins'], self.czbins)]: if not utils.is_equal_binning(kernel_axis, own_axis): raise ValueError("Binning of reconstruction kernel doesn't " "match the event maps!") # check shape of kernels logging.debug('Checking shape of reconstruction kernels') shape = (len(self.ebins)-1, len(self.czbins)-1, len(self.ebins)-1, len(self.czbins)-1) for flavour in kernels: if flavour in ['ebins', 'czbins']: continue for interaction in kernels[flavour]: if not np.shape(kernels[flavour][interaction]) == shape: raise IndexError( 'Reconstruction kernel for %s/%s has wrong shape: ' '%s, %s' %(flavour, interaction, str(shape), str(np.shape(kernels[flavour][interaction]))) ) logging.info('Reconstruction kernels are sane') return True
def __init__(self,ebins,czbins,particle_ID=None,**kwargs): logging.info('Initializing PIDServicePar...') #Evaluate the functions at the bin centers ecen = get_bin_centers(ebins) czcen = get_bin_centers(czbins) self.pid_maps = {} for signature in particle_ID.keys(): #Generate the functions to_trck_func = eval(particle_ID[signature]['trck']) to_cscd_func = eval(particle_ID[signature]['cscd']) #Make maps from the functions evaluated at the bin centers _,to_trck_map = np.meshgrid(czcen, to_trck_func(ecen)) _,to_cscd_map = np.meshgrid(czcen, to_cscd_func(ecen)) for label,pidmap in [('Track',to_trck_map),('Cascade',to_cscd_map)]: if (pidmap < 0).any(): raise ValueError('%s PID probabilites can not be negative!' ' Investigate parameterization'%label) self.pid_maps[signature] = {'trck':to_trck_map, 'cscd':to_cscd_map}
def get_hierarchy_gradients(data_tag, fiducial_maps, fiducial_params, grid_settings, store_dir): """ Use the hierarchy interpolation between the two fiducial maps to obtain the gradients. """ logging.info("Working on parameter hierarchy.") steps = get_steps('hierarchy', grid_settings, fiducial_params) hmap = {step:{'trck':{},'cscd':{}} for step in steps} for h in steps: for channel in ['trck','cscd']: # Superpose bin counts hmap[h][channel]['map'] = fiducial_maps['NMH'][channel]['map']*h + fiducial_maps['IMH'][channel]['map']*(1.-h) # Obtain binning from one of the maps, since identical by construction (cf. FisherAnalysis) hmap[h][channel]['ebins'] = fiducial_maps['NMH'][channel]['ebins'] hmap[h][channel]['czbins'] = fiducial_maps['NMH'][channel]['czbins'] # TODO: give hmap the same structure as pmaps? # Get_derivative_map works even if 'params' and 'ebins','czbins' not in 'data' # Store the maps used to calculate partial derivatives if store_dir != tempfile.gettempdir(): logging.info("Writing maps for parameter 'hierarchy' to %s"%store_dir) to_json(hmap,os.path.join(store_dir,"hierarchy_"+data_tag+".json")) gradient_map = get_derivative_map(hmap, fiducial_params['hierarchy'],degree=2) return gradient_map
def __init__(self,ebins,czbins,aeff_egy_par,aeff_coszen_par,**params): ''' Parameters: * aeff_egy_par - effective area vs. Energy 1D parameterizations for each flavor, in a text file (.dat) * aeff_coszen_par - 1D coszen parameterization for each flavor as a json_string code. ''' logging.info('Initializing AeffServicePar...') self.ebins = ebins self.czbins = czbins ## Load the info from .dat files into a dict... ## Parametric approach treats all NC events the same aeff2d_nc = self.get_aeff_flavor('NC',aeff_egy_par,aeff_coszen_par) aeff2d_nc_bar = self.get_aeff_flavor('NC_bar',aeff_egy_par,aeff_coszen_par) self.aeff_dict = {} logging.info("Creating effective area parametric dict...") for flavor in ['nue','nue_bar','numu','numu_bar','nutau','nutau_bar']: flavor_dict = {} logging.debug("Working on %s effective areas"%flavor) aeff2d = self.get_aeff_flavor(flavor,aeff_egy_par,aeff_coszen_par) flavor_dict['cc'] = aeff2d flavor_dict['nc'] = aeff2d_nc_bar if 'bar' in flavor else aeff2d_nc self.aeff_dict[flavor] = flavor_dict return
def sampleHypercube(n_dim, n_samp, rand_set_id=0, crit='m', iterations=5, rdata_dir='~/cowen/data/random'): """Load (if file exists) or generate samples from within hypercube using Latin hypercube sampling Requires pyDOE to generate new samples. """ fname = samplesFilename(n_dim=n_dim, n_samp=n_samp, rand_set_id=rand_set_id, crit=crit, iterations=iterations) rdata_dir = os.path.expandvars(os.path.expanduser(rdata_dir)) fpath = os.path.join(rdata_dir, fname) if os.path.exists(fpath): samps = fileio.from_file(fpath) else: logging.info('File not found. Generating new set of samples & saving' ' result to "%s"', fpath) import pyDOE mkdir(rdata_dir) # Set a deterministic random state based upon the critical hypercube # sampling parameters specified n_bad_seeds(n_dim, n_samp, rand_set_id) samps = pyDOE.lhs(n=n_dim, samples=n_samp, criterion=crit, iterations=iterations) fileio.to_file(samps, fpath) return samps
def oversample_binning(coarse_bins, factor): """ Oversample bin edges (coarse_bins) by the given factor """ if is_linear(coarse_bins): logging.info('Oversampling linear output binning by factor %i.' %factor) fine_bins = np.linspace(coarse_bins[0], coarse_bins[-1], factor*(len(coarse_bins)-1)+1) elif is_logarithmic(coarse_bins): logging.info('Oversampling logarithmic output binning by factor %i.' %factor) fine_bins = np.logspace(np.log10(coarse_bins[0]), np.log10(coarse_bins[-1]), factor*(len(coarse_bins)-1)+1) else: logging.warn('Irregular binning detected! Evenly oversampling ' 'by factor %i'%factor) fine_bins = np.array([]) for i, upper_edge in enumerate(coarse_bins[1:]): fine_bins = np.append(fine_bins, np.linspace(coarse_bins[i], upper_edge, factor, endpoint=False)) return fine_bins
def __init__(self, ebins, czbins, detector_depth=None, earth_model=None, prop_height=None, oversample_e=None,oversample_cz=None, **kwargs): """ \params: * ebins: Energy bin edges * czbins: cos(zenith) bin edges * earth_model: Earth density model used for matter oscillations. * detector_depth: Detector depth in km. * prop_height: Height in the atmosphere to begin in km. """ logging.info('Instantiating %s'%self.__class__.__name__) self.ebins = np.array(ebins) self.czbins = np.array(czbins) for ax in [self.ebins, self.czbins]: if (len(np.shape(ax)) != 1): raise IndexError('Axes must be 1d! '+str(np.shape(ax))) report_params(get_params(),['km','','','','km']) earth_model = find_resource(earth_model) self.earth_model = earth_model self.FTYPE = np.float64 self.ebins_fine = oversample_binning(self.ebins, oversample_e) self.czbins_fine = oversample_binning(self.czbins, oversample_cz) self.ecen_fine = get_bin_centers(self.ebins_fine) self.czcen_fine = get_bin_centers(self.czbins_fine) self.initialize_kernel() return
def get_gradients(data_tag, param, template_maker, fiducial_params, grid_settings, store_dir): """ Use the template maker to create all the templates needed to obtain the gradients. """ logging.info("Working on parameter %s."%param) steps = get_steps(param, grid_settings, fiducial_params) pmaps = {} # Generate one template for each value of the parameter in question and store in pmaps for param_value in steps: # Make the template corresponding to the current value of the parameter with Timer() as t: maps = template_maker.get_template( get_values(dict(fiducial_params,**{param:dict(fiducial_params[param], **{'value': param_value})}))) tprofile.info("==> elapsed time for template: %s sec"%t.secs) pmaps[param_value] = maps # Store the maps used to calculate partial derivatives if store_dir != tempfile.gettempdir(): logging.info("Writing maps for parameter %s to %s"%(param,store_dir)) to_json(pmaps, os.path.join(store_dir,param+"_"+data_tag+".json")) gradient_map = get_derivative_map(pmaps,fiducial_params[param],degree=2) return gradient_map
def mkdir(d, mode=0750): try: os.makedirs(os.path.expandvars(os.path.expanduser(d)), mode=mode) except OSError as err: if err[0] != 17: raise err else: logging.info('Created directory: ' + d + '\n')
def kernel_from_simfile(self, simfile=None, **kwargs): logging.info("Opening file: %s" % (simfile)) try: fh = h5py.File(find_resource(simfile), "r") except IOError, e: logging.error("Unable to open event data file %s" % simfile) logging.error(e) sys.exit(1)
def __init__(self,ebins,czbins,aeff_weight_file=None,**kwargs): self.ebins = ebins self.czbins = czbins logging.info('Initializing AeffServiceMC...') logging.info('Opening file: %s'%(aeff_weight_file)) try: fh = h5py.File(find_resource(aeff_weight_file),'r') except IOError,e: logging.error("Unable to open aeff_weight_file %s"%aeff_weight_file) logging.error(e) sys.exit(1)
def get_llh_hypothesis( data_tag, asimov_data, ntrials, template_maker, template_params, minimizer_settings, save_steps, check_octant): """ Runs the llh fitter ntrials number of times, pulling pseudo data sets from asimov_data. \Params: * data_tag - hierarchy type running for assumed true. * asimov_data - asimov (unfluctuated) data from which to generate poisson fluctuated pseudo data * ntrials - number of trials to run for each hierarchy hypothesis * template_maker - instance of TemplateMaker class, from which to fit pseudo data to * template_params - dictionary of parameters at which to test the pseudo data and find the best match llh * minimizer_settings - settings for bfgs minimizer in llh fit * save_steps - flag to save the optimizer steps * check_octant - boolean to check both octants of theta23 \returns - trials list that holds the dictionaries of llh results. """ trials = [] for itrial in xrange(1,ntrials+1): results = {} # one trial of results tprofile.info("start trial %d"%itrial) logging.info(">"*10 + "Running trial: %05d"%itrial + "<"*10) results['seed'] = get_seed() logging.info(" RNG seed: %ld"%results['seed']) # Get random map generated from asimov data (or from data_tag). fmap = get_random_map(asimov_data, seed=results['seed']) for hypo_tag, hypo_normal in [('hypo_NMH',True),('hypo_IMH',False)]: physics.info( "Finding best fit for %s under %s assumption"%(data_tag,hypo_tag)) with Timer() as t: llh_data = find_max_llh_bfgs( fmap, template_maker, template_params, minimizer_settings, save_steps, normal_hierarchy=hypo_normal, check_octant=check_octant) tprofile.info("==> elapsed time for optimizer: %s sec"%t.secs) # Store the LLH data results[hypo_tag] = llh_data trials += [results] tprofile.info("stop trial %d"%itrial) return trials
def __init__(self,ebins,czbins,reco_weight_file=None,**kwargs): self.ebins = ebins self.czbins = czbins logging.info("Initializing RecoService...") logging.info('Opening file: %s'%(reco_weight_file)) try: fh = h5py.File(find_resource(reco_weight_file),'r') except IOError,e: logging.error("Unable to open event data file %s"%reco_weight_file) logging.error(e) sys.exit(1)
def __init__(self, ebins, czbins, detector_depth=None, earth_model=None, prop_height=None, oversample_e=None,oversample_cz=None,gpu_id=None, **kwargs): """ \params: * ebins: Energy bin edges * czbins: cos(zenith) bin edges * earth_model: Earth density model used for matter oscillations. * detector_depth: Detector depth in km. * prop_height: Height in the atmosphere to begin in km. * gpu_id: If running on a system with multiple GPUs, it will choose the one with gpu_id. Otherwise, defaults to default context """ self.gpu_id = gpu_id try: import pycuda.autoinit self.context = cuda.Device(self.gpu_id).make_context() print "Initializing PyCUDA using gpu id: %d"%self.gpu_id except: import pycuda.autoinit print "Auto initializing PyCUDA..." #mfree,mtot = cuda.mem_get_info() #print "free memory: %s mb",mfree/1.0e6 #print "tot memory: %s mb",mtot/1.0e6 #raw_input("PAUSED...") logging.info('Instantiating %s'%self.__class__.__name__) self.ebins = np.array(ebins) self.czbins = np.array(czbins) self.prop_height = prop_height for ax in [self.ebins, self.czbins]: if (len(np.shape(ax)) != 1): raise IndexError('Axes must be 1d! '+str(np.shape(ax))) report_params(get_params(),['km','','','','']) earth_model = find_resource(earth_model) self.earth_model = earth_model self.FTYPE = np.float64 self.ebins_fine = oversample_binning(self.ebins, oversample_e) self.czbins_fine = oversample_binning(self.czbins, oversample_cz) self.ecen_fine = get_bin_centers(self.ebins_fine) self.czcen_fine = get_bin_centers(self.czbins_fine) self.initialize_kernel(detector_depth,**kwargs) return
def __init__(self,template_settings,ebins=None,czbins=None,oversample_e=None, oversample_cz=None,**kwargs): ''' TemplateMaker class handles all of the setup and calculation of the templates for a given binning. Parameters: * template_settings - dictionary of all template-making settings * ebins - energy bin edges * czbins - coszen bin edges ''' self.ebins = ebins self.czbins = czbins self.oversample_e = oversample_e self.oversample_cz = oversample_cz logging.debug("Using %u bins in energy from %.2f to %.2f GeV"% (len(self.ebins)-1,self.ebins[0],self.ebins[-1])) logging.debug("Using %u bins in cos(zenith) from %.2f to %.2f"% (len(self.czbins)-1,self.czbins[0],self.czbins[-1])) #Instantiate a flux model service self.flux_service = HondaFluxService(**template_settings) # Oscillated Flux: if template_settings['osc_code']=='prob3': self.osc_service = Prob3OscillationService(self.ebins,self.czbins, **template_settings) else: raise NotImplementedError('OscillationService is only implemented for prob3! osc_code = %s'%osc_code) # Aeff/True Event Rate: if template_settings['parametric']: logging.info(" Using effective area from PARAMETRIZATION...") self.aeff_service = AeffServicePar(self.ebins,self.czbins, **template_settings) else: logging.info(" Using effective area from MC EVENT DATA...") self.aeff_service = AeffServiceMC(self.ebins,self.czbins, **template_settings) # Reco Event Rate: self.reco_service = RecoServiceMC(self.ebins,self.czbins, **template_settings) # PID Service: self.pid_service = PIDServicePar(self.ebins,self.czbins, **template_settings) return
def plot_pid_stage(nmh, imh, title='', save=False, dpi=150, outdir=""): ''' Plots templates and asymmetry for only the final level stage ''' h_asym = get_asymmetry(nmh, imh, ['trck','cscd']) logging.info(" Total trck events (NMH): %d"%np.sum(nmh['trck']['map'])) logging.info(" Total trck events (IMH): %d"%np.sum(imh['trck']['map'])) logging.info(" Total cscd events (NMH): %d"%np.sum(nmh['cscd']['map'])) logging.info(" Total cscd events (IMH): %d"%np.sum(imh['cscd']['map'])) for chan in ['trck','cscd']: plt.figure(figsize=(16,5)) plt.subplot(1,3,1) show_map(nmh[chan]) plt.title(title+' NMH, '+chan+' counts',fontsize='large') plt.subplot(1,3,2) show_map(imh[chan]) plt.title(title+' IMH, '+chan +' counts',fontsize='large') plt.subplot(1,3,3) sigma = np.sqrt(np.sum(h_asym[chan]['map']**2)) show_map(h_asym[chan],cmap='RdBu_r') plt.title(title+' '+chan+r' asymmetry, $\sigma$ = %.3f'%sigma, fontsize='large') if save: print "Saving %s chan..."%chan filename = os.path.join(outdir,title+'_asym_'+chan+'.png') plt.savefig(filename,dpi=dpi) return
def plot_asimov_line(llh_dict, tkey, max_yval, **kwargs): """ llh_dict - dictionary of llh data tkey - key of the true hierarchy (from asimov or pseudo data set) max_yval - maximum yvalue for asimov line. """ validate_key(tkey) asimov_data = llh_dict[tkey]['asimov_data'] asimov_data_null = llh_dict[tkey]['asimov_data_null'] llh_asimov = get_binwise_llh(asimov_data,asimov_data) llh_null = -llh_dict[tkey]['llh_null']['llh'][-1] logging.info(" >> llh_asimov: %.4f"%llh_asimov) logging.info(" >> llh null: %.4f"%llh_null) logging.info("Null hypothesis: ") for k,v in llh_dict[tkey]['llh_null'].items(): logging.info(" >> %s: %f"%(k,v[-1])) asimov_llr = (llh_null - llh_asimov if 'true_N' in tkey else llh_asimov - llh_null) vline = plt.vlines( asimov_llr, 0.1, max_yval ,colors='k',**kwargs) return asimov_llr
def get_template(self,params,return_stages=False): ''' Runs entire template-making chain, using parameters found in 'params' dict. If 'return_stages' is set to True, returns output from each stage as a simple tuple. ''' flux_maps = get_flux_maps(self.flux_service,self.ebins,self.czbins) logging.info("Getting osc prob maps...") osc_flux_maps = get_osc_flux(flux_maps,self.osc_service, oversample_e=self.oversample_e,oversample_cz=self.oversample_cz,**params) logging.info("Getting event rate true maps...") event_rate_maps = get_event_rates(osc_flux_maps,self.aeff_service, **params) logging.info("Getting event rate reco maps...") event_rate_reco_maps = get_reco_maps(event_rate_maps,self.reco_service, **params) logging.info("Getting pid maps...") final_event_rate = get_pid_maps(event_rate_reco_maps,self.pid_service) if not return_stages: return final_event_rate # Otherwise, return all stages as a simple tuple return (flux_maps, osc_flux_maps, event_rate_maps, event_rate_reco_maps, final_event_rate)
def plot_posterior_params(frames, template_settings, plot_param_info=True, pbins=20, **kwargs): """Plot posterior parameter distributions, and related data""" ###################################################### # Need a new algorithm here. What I want is to calculate the # number of figures then number of subfigures for each figure, # based on the number of columns. ###################################################### good_columns = [col for col in frames[0].columns if col not in ["hypo", "pseudo_data"]] max_plots_per_fig = 4 nfigs = (len(good_columns) - 1) / max_plots_per_fig + 1 logging.info("len(good_cols): %d, nfigs: %d" % (len(good_columns), nfigs)) figs = [] fig_names = [] colors = ["b", "r", "g", "k", "c", "m"] for frame in frames: ifig = 0 data_key = frame["pseudo_data"][0] hypo_key = frame["hypo"][0] for icol, col_name in enumerate(good_columns): column = frame[col_name] # Create new fig if needed: if (icol % max_plots_per_fig) == 0: ifig += 1 fig = plt.figure(figsize=(10, 10)) fig_names.append(data_key + "_" + hypo_key + "_" + str(ifig) + ".png") figs.append(fig) fig.suptitle("Posteriors for %s, %s" % (data_key, hypo_key)) # fontsize='large') # Why is this not adding subplot?... subplot = icol % max_plots_per_fig + 1 color = "k" if plot_param_info else colors[icol % len(colors)] plot_column( data_key, hypo_key, subplot, column, template_settings, color, plot_param_info=plot_param_info, pbins=pbins, ) return figs, fig_names
def _get_reco_kernels(self, simfile=None, **kwargs): for reco_scale in ["e_reco_scale", "cz_reco_scale"]: if reco_scale in kwargs and kwargs[reco_scale] != 1: raise ValueError("%s = %.2f, must be 1.0 for RecoServiceMC!" % (reco_scale, kwargs[reco_scale])) if not simfile in [self.simfile, None]: logging.info("Reconstruction from non-default MC file %s!" % simfile) return kernel_from_simfile(simfile=simfile) if not hasattr(self, "kernels"): logging.info("Using file %s for default reconstruction" % (simfile)) self.kernels = self.kernel_from_simfile(simfile=simfile) return self.kernels
def check_fine_binning(fine_bins, coarse_bins): """ This function checks whether the specified fine binning exists and is actually finer than the coarse one. """ if fine_bins is not None: if is_coarser_binning(coarse_bins, fine_bins): logging.info('Using requested binning for oversampling.') #everything is fine return True else: errmsg = 'Requested oversampled binning is coarser ' + \ 'than output binning. Aborting.' logging.error(errmsg) raise ValueError(errmsg) return False
def make_llr_with_false_h(llr_true_h, llr_false_h, nbins, xlim=15): fig = plt.figure(figsize=(15,7)) # 0) Plot true_h distributions, and get mean llr value logging.info( " -->Plotting, calculating gaussian parameters for MC True:") colors = ['b','r'] for ii,tkey in enumerate(['true_NH','true_IH']): plt.subplot(1,2,ii+1) #label = r'$\mathcal{L}$( %s | IMH)/$\mathcal{L}$( %s | NMH)'%(tkey,tkey) label = 'LLR(true Normal)' if tkey == 'true_NH' else 'LLR(true Inverted)' hvals, bincen, gfit = plot_llr_distribution( llr_true_h[tkey], tkey, nbins, color=colors[ii], label=label) logging.info(" -->Plotting for false hierarchy best fit:") mc_table = [] colors = ['r','b'] for ii,tkey in enumerate(['true_NH','true_IH']): plt.subplot(1,2,ii+1) label=r'H$_0$: Other Hierarchy' hvals, bincen, gfit = plot_llr_distribution( llr_false_h[tkey], tkey, nbins, color=colors[ii], label=label) max_line = max(hvals)*1.2 label=("Asimov_%s"%tkey) asimov_llr = llr_true_h[tkey].median() vline = plt.vlines( asimov_llr, 0.1, max_line ,colors='k') mcrow = plot_fill( llr_false_h[tkey], tkey, asimov_llr, hvals, bincen, gfit, alpha=0.5, hatch='xx', facecolor='black') plt.legend(framealpha=0.5,loc='best') ax = set_xlim(llr_true_h[tkey],llr_false_h[tkey]) ax.set_ylim([0,max_line*1.2]) mc_table.append(mcrow) plt.grid(False) plt.tight_layout() displayStats(mc_table) return fig
def get_osc_prob_maps(self, **kwargs): """ Returns an oscillation probability map dictionary calculated at the values of the input parameters: deltam21,deltam31,theta12,theta13,theta23,deltacp for flavor_from to flavor_to, with the binning of ebins,czbins. The dictionary is formatted as: 'nue_maps': {'nue':map,'numu':map,'nutau':map}, 'numu_maps': {...} 'nue_bar_maps': {...} 'numu_bar_maps': {...} NOTES: * expects all angles in [rad] * this method doesn't calculate the oscillation probabilities itself, but calls get_osc_probLT_dict internally, to get a high resolution map of the oscillation probs, """ # Get the finely binned maps as implemented in the derived class logging.info("Retrieving finely binned maps") with Timer(verbose=False) as t: fine_maps = self.get_osc_probLT_dict(**kwargs) print " ==> elapsed time to get all fine maps: %s sec" % t.secs logging.info("Smoothing fine maps...") smoothed_maps = {} smoothed_maps["ebins"] = self.ebins smoothed_maps["czbins"] = self.czbins with Timer(verbose=False) as t: for from_nu, tomap_dict in fine_maps.items(): if "vals" in from_nu: continue new_tomaps = {} for to_nu, pvals in tomap_dict.items(): logging.debug("Getting smoothed map %s/%s" % (from_nu, to_nu)) new_tomaps[to_nu] = get_smoothed_map( pvals, fine_maps["evals"], fine_maps["czvals"], self.ebins, self.czbins ) smoothed_maps[from_nu] = new_tomaps profile.debug(" ==> elapsed time to smooth maps: %s sec" % t.secs) return smoothed_maps
def plot_posterior_params(frames, template_settings, plot_llh=True, plot_param_info=True, pbins=20, mctrue=False, **kwargs): """Plot posterior parameter distributions, and related data""" good_columns = get_free_params( select_hierarchy(template_settings, normal_hierarchy=True)).keys() #good_columns = [col for col in frames[0].columns # if col not in ['hypo','mctrue']] if plot_llh: good_columns.append('llh') print "good_columns: \n",good_columns max_plots_per_fig = 4 nfigs = (len(good_columns)-1)/max_plots_per_fig + 1 logging.info("len(good_cols): %d, nfigs: %d"%(len(good_columns),nfigs)) figs = [] fig_names = [] colors = ['b','r','g','k','c','m'] for frame in frames: ifig = 0 true_key = frame['mctrue'][0] hypo_key = frame['hypo'][0] for icol,col_name in enumerate(good_columns): column = frame[col_name] # Create new fig if needed: if (icol%max_plots_per_fig) == 0: ifig += 1 fig = plt.figure(figsize=(10,10)) fig_names.append(true_key+"_"+hypo_key+"_"+str(ifig)+".png") figs.append(fig) fig.suptitle('Posteriors for %s, %s'%(true_key,hypo_key)) #fontsize='large') # Why is this not adding subplot?... subplot = (icol%max_plots_per_fig + 1) color = 'k' if plot_param_info else colors[icol%len(colors)] plot_column( true_key, hypo_key, subplot, column, template_settings, color,plot_param_info=plot_param_info,pbins=pbins, mctrue=mctrue) return figs,fig_names
def get_template(self, params, return_stages=False): ''' Runs entire template-making chain, using parameters found in 'params' dict. If 'return_stages' is set to True, returns output from each stage as a simple tuple. ''' logging.info("STAGE 1: Getting Atm Flux maps...") with Timer() as t: flux_maps = get_flux_maps(self.flux_service, self.ebins, self.czbins, **params) profile.debug("==> elapsed time for flux stage: %s sec"%t.secs) logging.info("STAGE 2: Getting osc prob maps...") with Timer() as t: osc_flux_maps = get_osc_flux(flux_maps, self.osc_service, oversample_e=self.oversample_e, oversample_cz=self.oversample_cz, **params) profile.debug("==> elapsed time for oscillations stage: %s sec"%t.secs) logging.info("STAGE 3: Getting event rate true maps...") with Timer() as t: event_rate_maps = get_event_rates(osc_flux_maps, self.aeff_service, **params) profile.debug("==> elapsed time for aeff stage: %s sec"%t.secs) logging.info("STAGE 4: Getting event rate reco maps...") with Timer() as t: event_rate_reco_maps = get_reco_maps(event_rate_maps, self.reco_service, **params) profile.debug("==> elapsed time for reco stage: %s sec"%t.secs) logging.info("STAGE 5: Getting pid maps...") with Timer(verbose=False) as t: final_event_rate = get_pid_maps(event_rate_reco_maps, self.pid_service) profile.debug("==> elapsed time for pid stage: %s sec"%t.secs) if not return_stages: return final_event_rate # Otherwise, return all stages as a simple tuple return (flux_maps, osc_flux_maps, event_rate_maps, event_rate_reco_maps, final_event_rate)
def __init__(self, ebins, czbins, detector_depth=None, earth_model=None, prop_height=None, **kwargs): """ Parameters needed to instantiate a Prob3OscillationService: * ebins: Energy bin edges * czbins: cos(zenith) bin edges * earth_model: Earth density model used for matter oscillations. * detector_depth: Detector depth in km. * prop_height: Height in the atmosphere to begin in km. """ OscillationServiceBase.__init__(self, ebins, czbins) logging.info('Initializing %s...'%self.__class__.__name__) report_params(get_params(),['km','','km']) self.prop_height = prop_height earth_model = find_resource(earth_model) self.barger_prop = BargerPropagator(earth_model, detector_depth) self.barger_prop.UseMassEigenstates(False)
def test_lookup_indices(): """Unit tests for `lookup_indices` function""" # # Test a variety of points. # Points falling exactly on the bound are included in the # n_evts = 100 x = np.array([-5, 0.5, 1.5, 7.0, 6.5, 8.0, 6.5], dtype=FTYPE) y = np.array([-5, 0.5, 1.5, 1.5, 3.0, 1.5, 2.5], dtype=FTYPE) z = np.array([-5, 0.5, 1.5, 1.5, 0.5, 6.0, 0.5], dtype=FTYPE) w = np.ones(n_evts, dtype=FTYPE) x = SmartArray(x) y = SmartArray(y) z = SmartArray(z) w = SmartArray(w) binning_x = OneDimBinning(name="x", num_bins=7, is_lin=True, domain=[0, 7]) binning_y = OneDimBinning(name="y", num_bins=4, is_lin=True, domain=[0, 4]) binning_z = OneDimBinning(name="z", num_bins=2, is_lin=True, domain=[0, 2]) binning_1d = binning_x binning_2d = binning_x * binning_y binning_3d = binning_x * binning_y * binning_z # 1D case: check that each event falls into its predicted bin # # All values higher or equal to the last bin edges are assigned an index of zero # logging.trace("TEST 1D:") logging.trace("Total number of bins: {}".format(7)) logging.trace("array in 1D: {}".format(x.get())) logging.trace("Binning: {}".format(binning_1d.bin_edges[0])) indices = lookup_indices([x], binning_1d) logging.trace("indices of each array element: {}".format(indices.get())) logging.trace("*********************************") test = indices.get() ref = np.array([-1, 0, 1, 6, 6, 7, 6]) assert np.array_equal(test, ref), "test={} != ref={}".format(test, ref) # 2D case: # # The binning edges are flattened as follows: # [(x=0, y=0), (x=0, y=1), (x=1, y=0), ...] # logging.trace("TEST 2D:") logging.trace("Total number of bins: {}".format(7 * 4)) logging.trace("array in 2D: {}".format(list(zip(x.get(), y.get())))) logging.trace("Binning: {}".format(binning_2d.bin_edges)) indices = lookup_indices([x, y], binning_2d) logging.trace("indices of each array element: {}".format(indices.get())) logging.trace("*********************************") test = indices.get() ref = np.array([-1, 0, 5, 25, 27, 28, 26]) assert np.array_equal(test, ref), "test={} != ref={}".format(test, ref) # 3D case: # # the binning edges are flattened as follows: # [(x=0, y=0, z=0), (x=0, y=0, z=1), (x=0, y=1, z=0)...] # logging.trace("TEST 3D:") logging.trace("Total number of bins: {}".format(7 * 4 * 2)) logging.trace("array in 3D: {}".format(list(zip(x.get(), y.get(), z.get())))) logging.trace("Binning: {}".format(binning_3d.bin_edges)) indices = lookup_indices([x, y, z], binning_3d) logging.trace("indices of each array element: {}".format(indices.get())) logging.trace("*********************************") test = indices.get() ref = np.array([-1, 0, 11, 51, 54, 56, 52]) assert np.array_equal(test, ref), "test={} != ref={}".format(test, ref) logging.info("<< PASS : test_lookup_indices >>")
def test_Data(): """Unit tests for Data class""" # Instantiate from LEESARD file - located in $PISA_RESOURCES file_loc = 'LEESARD/PRD_extend_finalLevel/12550.pckl' file_loc2 = 'LEESARD/PRD_extend_finalLevel/14550.pckl' f = from_file(file_loc) f2 = from_file(file_loc2) d = {'nue+nuebar': f} d2 = {'numu+numubar': f2} data = Data(d) data2 = Data(d2) logging.debug(str((data.keys()))) muon_file = 'GRECO/new_style_files/Level7_muongun.12370_15.pckl' m = {'muons': from_file(muon_file)} m = Data(val=m) assert m.contains_muons assert not m.contains_neutrinos logging.debug(str((m))) data = data + m assert data.contains_neutrinos logging.debug(str((data))) if not data.contains_muons: raise Exception("data doesn't contain muons.") logging.debug(str((data.neutrinos.keys()))) noise_file = 'GRECO/new_style_files/Level7_VuvuzelaPureNoise_V2.990015.pckl' n = {'noise': from_file(muon_file)} n = Data(val=n) assert n.contains_noise assert not n.contains_neutrinos logging.debug(str((n))) data = data + n assert data.contains_neutrinos logging.debug(str((data))) if not data.contains_noise: raise Exception("data doesn't contain noise.") logging.debug(str((data.neutrinos.keys()))) # Apply a simple cut # data.applyCut('(zenith <= 1.1) & (energy <= 200)') # for fi in data.flavint_groups: # assert np.max(data[fi]['zenith']) <= 1.1 # assert np.max(data[fi]['energy']) <= 200 # Apply an "inbounds" cut via a OneDimBinning # e_binning = OneDimBinning( # name='energy', num_bins=80, is_log=True, domain=[10, 200]*ureg.GeV # ) # data.keepInbounds(e_binning) # for fi in data.flavint_groups: # assert np.min(data[fi]['energy']) >= 10 # assert np.max(data[fi]['energy']) <= 200 # Apply an "inbounds" cut via a MultiDimBinning # e_binning = OneDimBinning( # name='energy', num_bins=80, is_log=True, domain=[20, 210]*ureg.GeV # ) # cz_binning = OneDimBinning( # name='zenith', num_bins=40, is_lin=True, domain=[0.1, 1.8*np.pi] # ) # mdb = MultiDimBinning([e_binning, cz_binning]) # data.keepInbounds(mdb) # for fi in data.flavint_groups: # assert np.min(data[fi]['energy']) >= 20 # assert np.max(data[fi]['energy']) <= 210 # assert np.min(data[fi]['zenith']) >= 0.1 # assert np.max(data[fi]['zenith']) <= 1.8*np.pi # # Now try to apply a cut that fails on one flav/int (since the field will # # be missing) and make sure that the cut did not get applied anywhere in # # the end (i.e., it is rolled back) # sub_evts = data['nue+nuebar'] # sub_evts.pop('energy') # data['nue+nuebar'] = sub_evts # try: # data.applyCut('(energy >= 30) & (energy <= 40)') # except Exception: # pass # else: # raise Exception('Should not have been able to apply the cut!') # for fi in data.flavint_groups: # if fi == NuFlavIntGroup('nue+nuebar'): # continue # assert np.min(data[fi]['energy']) < 30 data.save('/tmp/test_FlavIntDataGroup.json') data.save('/tmp/test_FlavIntDataGroup.hdf5') data = Data('/tmp/test_FlavIntDataGroup.json') data = Data(val='/tmp/test_FlavIntDataGroup.hdf5') d3 = data + data2 + m logging.debug(str((d3))) d3_com = d3.transform_groups(['nue+nuebar+numu+numubar']) logging.debug(str((d3_com))) logging.info('<< PASS : test_Data >>')
def _compute_nominal_outputs(self): """load the evnts from file, perform sanity checks and histogram them (into final MapSet) """ # get params data_file_name = self.params.data_file.value sim_version = self.params.sim_ver.value bdt_cut = self.params.bdt_cut.value.m_as('dimensionless') self.bin_names = self.output_binning.names # TODO: convert units using e.g. `comp_units` in stages/reco/hist.py self.bin_edges = [] for name in self.bin_names: if 'energy' in name: bin_edges = self.output_binning[name].bin_edges.to('GeV').magnitude else: bin_edges = self.output_binning[name].bin_edges.magnitude self.bin_edges.append(bin_edges) # the rest of this function is PISA v2 legacy code... # right now only use burn sample with sim_version = '4digit' #print "sim_version == ", sim_version if sim_version == "4digit": Reco_Neutrino_Name = 'IC86_Dunkman_L6_MultiNest8D_PDG_Neutrino' Reco_Track_Name = 'IC86_Dunkman_L6_MultiNest8D_PDG_Track' elif sim_version == "5digit" or sim_version=="dima": Reco_Neutrino_Name = 'IC86_Dunkman_L6_PegLeg_MultiNest8D_NumuCC' Reco_Track_Name = 'IC86_Dunkman_L6_PegLeg_MultiNest8D_Track' else: raise ValueError( 'only allow 4digit, 5digit(H2 model for hole ice) or' ' dima (dima p1 and p2 for hole ice)!' ) data_file = h5py.File(find_resource(data_file_name), 'r') L6_result = np.array(data_file['IC86_Dunkman_L6']['result']) dLLH = np.array(data_file['IC86_Dunkman_L6']['delta_LLH']) reco_energy_all = np.array(data_file[Reco_Neutrino_Name]['energy']) reco_coszen_all = np.array(np.cos( data_file[Reco_Neutrino_Name]['zenith'] )) reco_trck_len_all = np.array(data_file[Reco_Track_Name]['length']) #print "before L6 cut, no. of burn sample = ", len(reco_coszen_all) # sanity check santa_doms = data_file['IC86_Dunkman_L6_SANTA_DirectDOMs']['value'] l3 = data_file['IC86_Dunkman_L3']['value'] l4 = data_file['IC86_Dunkman_L4']['result'] l5 = data_file['IC86_Dunkman_L5']['bdt_score'] assert(np.all(santa_doms>=3) and np.all(l3 == 1) and np.all(l5 >= 0.1)) # l4==1 was not applied when i3 files were written to hdf5 files, so do # it here dLLH = dLLH[l4==1] reco_energy_all = reco_energy_all[l4==1] reco_coszen_all = reco_coszen_all[l4==1] l5 = l5[l4==1] L6_result = L6_result[l4==1] data_file.close() dLLH_L6 = dLLH[L6_result==1] l5 = l5[L6_result==1] reco_energy_L6 = reco_energy_all[L6_result==1] reco_coszen_L6 = reco_coszen_all[L6_result==1] #print "after L6 cut, no. of burn sample = ", len(reco_coszen_L6) # Cut: Only keep bdt score >= 0.2 (from MSU latest result, make data/MC # agree much better); if use no such further cut, use bdt_cut = 0.1 logging.info( "Cut2, removing events with bdt_score < %s i.e. only keep bdt > %s" %(bdt_cut, bdt_cut) ) cut_events = {} cut = l5>=bdt_cut cut_events['reco_energy'] = reco_energy_L6[cut] cut_events['reco_coszen'] = reco_coszen_L6[cut] cut_events['pid'] = dLLH_L6[cut] hist, _ = np.histogramdd(sample = np.array( [cut_events[bin_name] for bin_name in self.bin_names] ).T, bins=self.bin_edges) maps = [Map(name=self.output_names[0], hist=hist, binning=self.output_binning)] self.template = MapSet(maps, name='data')
def run_interpolated_fit(fit_directory, job_idx, skip_successful=False): """Run the hypersurface fit for a grid point. If `skip_successful` is true, do not run if the `fit_successful` flag is already True. """ assert os.path.isdir(fit_directory), "fit directory does not exist" gridpoint_json = os.path.join(fit_directory, f"gridpoint_{job_idx:06d}.json.bz2") gridpoint_data = from_json(gridpoint_json) if skip_successful and gridpoint_data["fit_successful"]: logging.info(f"Fit at job index {job_idx} already successful, skipping...") return metadata = from_json(os.path.join(fit_directory, "metadata.json")) interpolation_param_spec = metadata["interpolation_param_spec"] # this is a pipeline configuration in the form of an OrderedDict nominal_dataset = metadata["nominal_dataset"] # Why can we still not load PISA objects from JSON that are inside a dict?! Grrr... nominal_dataset["pipeline_cfg"] = pipeline_cfg_from_states( nominal_dataset["pipeline_cfg"] ) # this is a list of pipeline configurations sys_datasets = metadata["sys_datasets"] for sys_dataset in sys_datasets: sys_dataset["pipeline_cfg"] = pipeline_cfg_from_states( sys_dataset["pipeline_cfg"] ) # this is a dict of param_name : value pairs param_values = gridpoint_data["param_values"] # we do a redundant check to make sure the parameter values at this grid point are # correct interpolation_param_names = metadata["interpolation_param_names"] grid_shape = tuple(metadata["grid_shape"]) # the grid point index of this job grid_idx = list(np.ndindex(grid_shape))[job_idx] for i, n in enumerate(interpolation_param_names): ms = "Inconsistent parameter values at grid point!" assert interpolation_param_spec[n]["values"][grid_idx[i]] == param_values[n], ms # now we need to adjust the values of the parameter in all pipelines for this point logging.info(f"updating pipelines with parameter values: {param_values}") for dataset in [nominal_dataset] + sys_datasets: for stage_cfg in dataset["pipeline_cfg"].values(): if "params" not in stage_cfg.keys(): continue for param in interpolation_param_names: if param in stage_cfg["params"].names: stage_cfg["params"][param].value = param_values[param] # these are the parameters of the hypersurface, NOT the ones we interpolate them # over! hypersurface_params = [] for param_state in metadata["hypersurface_params"]: hypersurface_params.append(HypersurfaceParam.from_state(param_state)) # We create Pipeline objects, get their outputs and then forget about the Pipeline # object on purpose! The memory requirement to hold all systematic sets at the same # time is just too large, especially on the cluster. The way we do it below we # only need enough memory for one dataset at a time. nominal_dataset["mapset"] = Pipeline(nominal_dataset["pipeline_cfg"]).get_outputs() for sys_dataset in sys_datasets: sys_dataset["mapset"] = Pipeline(sys_dataset["pipeline_cfg"]).get_outputs() # Merge maps according to the combine regex, if one was provided combine_regex = metadata["combine_regex"] if combine_regex is not None: nominal_dataset["mapset"] = nominal_dataset["mapset"].combine_re(combine_regex) for sys_dataset in sys_datasets: sys_dataset["mapset"] = sys_dataset["mapset"].combine_re(combine_regex) hypersurface_fit_kw = metadata["hypersurface_fit_kw"] hypersurfaces = collections.OrderedDict() log = metadata["log"] # flag determining whether hs fit is run in log-space or not for map_name in nominal_dataset["mapset"].names: nominal_map = nominal_dataset["mapset"][map_name] nominal_param_values = nominal_dataset["sys_params"] sys_maps = [sys_dataset["mapset"][map_name] for sys_dataset in sys_datasets] sys_param_values = [sys_dataset["sys_params"] for sys_dataset in sys_datasets] hypersurface = Hypersurface( # Yes, this MUST be a deepcopy! Otherwise weird memory overwrites happen # and all the numbers get jumbled across the hypersurfaces of different maps params=copy.deepcopy(hypersurface_params), initial_intercept=0. if log else 1., # Initial value for intercept log=log ) hypersurface.fit( nominal_map=nominal_map, nominal_param_values=nominal_param_values, sys_maps=sys_maps, sys_param_values=sys_param_values, norm=True, # Is the space or loading time really a problem? # keep_maps=False, # it would take a lot more space otherwise **hypersurface_fit_kw ) logging.debug("\nFitted hypersurface report:\n%s" % hypersurface) hypersurfaces[map_name] = hypersurface gridpoint_data["hs_fit"] = hypersurfaces gridpoint_data["fit_successful"] = True to_json(gridpoint_data, gridpoint_json)
def get_fisher_matrices(template_settings, grid_settings, IMH=True, NMH=False, dump_all_stages=False, save_templates=False, outdir=None): ''' Main function that runs the Fisher analysis for the chosen hierarchy(ies) (inverted by default). Returns a dictionary of Fisher matrices, in the format: {'IMH': {'cscd': [...], 'trck': [...], 'comb': [...], }, 'NMH': {'cscd': [...], 'trck': [...], 'comb': [...], } } If save_templates=True and no hierarchy is given, only fiducial templates will be written out; if one is given, then the templates used to obtain the gradients will be written out in addition. ''' if outdir is None and (save_templates or dump_all_stages): logging.info( "No output directory specified. Will save templates to current working directory." ) outdir = os.getcwd() tprofile.info("start initializing") # Get the parameters params = template_settings['params'] bins = template_settings['binning'] # Artifically add the hierarchy parameter to the list of parameters # The method get_hierarchy_gradients below will know how to deal with it params['hierarchy_nh'] = { "value": 1., "range": [0., 1.], "fixed": False, "prior": None } params['hierarchy_ih'] = { "value": 0., "range": [0., 1.], "fixed": False, "prior": None } chosen_data = [] if IMH: chosen_data.append(('IMH', False)) logging.info("Fisher matrix will be built for IMH.") if NMH: chosen_data.append(('NMH', True)) logging.info("Fisher matrix will be built for NMH.") if chosen_data == []: # In this case, only the fiducial maps (for both hierarchies) will be written logging.info("No Fisher matrices will be built.") # There is no sense in performing any of the following steps if no Fisher matrices are to be built # and no templates are to be saved. if chosen_data != [] or dump_all_stages or save_templates: # Initialise return dict to hold Fisher matrices fisher = { data_tag: { 'cscd': [], 'trck': [], 'comb': [] } for data_tag, data_normal in chosen_data } # Get a template maker with the settings used to initialize template_maker = TemplateMaker(get_values(params), **bins) tprofile.info("stop initializing\n") # Generate fiducial templates for both hierarchies (needed for partial derivatives # w.r.t. hierarchy parameter) fiducial_maps = {} for hierarchy in ['NMH', 'IMH']: logging.info("Generating fiducial templates for %s." % hierarchy) # Get the fiducial parameter values corresponding to this hierarchy fiducial_params = select_hierarchy( params, normal_hierarchy=(hierarchy == 'NMH')) # Generate fiducial maps, either all of them or only the ultimate one tprofile.info("start template calculation") with Timer() as t: fid_maps = template_maker.get_template( get_values(fiducial_params), return_stages=dump_all_stages) tprofile.info("==> elapsed time for template: %s sec" % t.secs) fiducial_maps[ hierarchy] = fid_maps[4] if dump_all_stages else fid_maps # save fiducial map(s) # all stages if dump_all_stages: stage_names = ("0_unoscillated_flux", "1_oscillated_flux", "2_oscillated_counts", "3_reco", "4_pid") stage_maps = {} for stage in xrange(0, len(fid_maps)): stage_maps[stage_names[stage]] = fid_maps[stage] logging.info( "Writing fiducial maps (all stages) for %s to %s." % (hierarchy, outdir)) to_json(stage_maps, os.path.join(outdir, "fid_map_" + hierarchy + ".json")) # only the final stage elif save_templates: logging.info( "Writing fiducial map (final stage) for %s to %s." % (hierarchy, outdir)) to_json(fiducial_maps[hierarchy], os.path.join(outdir, "fid_map_" + hierarchy + ".json")) # Get_gradients and get_hierarchy_gradients will both (temporarily) # store the templates used to generate the gradient maps store_dir = outdir if save_templates else tempfile.gettempdir() # Calculate Fisher matrices for the user-defined cases (NHM true and/or IMH true) for data_tag, data_normal in chosen_data: logging.info("Running Fisher analysis for %s." % (data_tag)) # The fiducial params are selected from the hierarchy case that does NOT match # the data, as we are varying from this model to find the 'best fit' fiducial_params = select_hierarchy(params, not data_normal) # Get the free parameters (i.e. those for which the gradients should be calculated) free_params = select_hierarchy(get_free_params(params), not data_normal) gradient_maps = {} for param in free_params.keys(): # Special treatment for the hierarchy parameter if param == 'hierarchy': gradient_maps[param] = get_hierarchy_gradients( data_tag=data_tag, fiducial_maps=fiducial_maps, fiducial_params=fiducial_params, grid_settings=grid_settings, store_dir=store_dir) else: gradient_maps[param] = get_gradients( data_tag=data_tag, param=param, template_maker=template_maker, fiducial_params=fiducial_params, grid_settings=grid_settings, store_dir=store_dir) logging.info("Building Fisher matrix for %s." % (data_tag)) # Build Fisher matrices for the given hierarchy fisher[data_tag] = build_fisher_matrix( gradient_maps=gradient_maps, fiducial_map=fiducial_maps['IMH'] if data_normal else fiducial_maps['NMH'], template_settings=fiducial_params) # If Fisher matrices exist for both channels, add the matrices to obtain # the combined one. if len(fisher[data_tag].keys()) > 1: fisher[data_tag]['comb'] = FisherMatrix( matrix=np.array([ f.matrix for f in fisher[data_tag].itervalues() ]).sum(axis=0), parameters=gradient_maps.keys(), #order is important here! best_fits=[ fiducial_params[par]['value'] for par in gradient_maps.keys() ], priors=[ Prior.from_param(fiducial_params[par]) for par in gradient_maps.keys() ], ) return fisher else: logging.info("Nothing to be done.") return {}
data_maker.params.fix(p_name) analysis = Analysis(data_maker=data_maker, template_maker=template_maker, metric=args.metric, blind=args.blind) analysis.minimizer_settings = from_file(args.minimizer_settings) analysis.pseudodata_method = args.pseudo_data #analysis.randomize_free_params() results = [] for i in range(args.num_trials): logging.info('Running trial %i' % i) np.random.seed() analysis.generate_psudodata() if args.function == 'profile': if args.mode == 'H0': results.append( analysis.profile(args.var, [0.] * ureg.dimensionless, check_octant=not args.no_check_octant, pprint=not args.quiet)) elif args.mode == 'scan': results.append( analysis.profile(args.var, eval(args.range), check_octant=not args.no_check_octant, pprint=not args.quiet))
def run_minimizer(self, pprint=True, skip=False): # Get initial values x0 = self.template_maker.params.free._rescaled_values # bfgs steps outside of given bounds by 1 epsilon to evaluate gradients try: epsilon = self.minimizer_settings['options']['value']['eps'] except: epsilon = self.minimizer_settings['options']['value']['epsilon'] bounds = [(0 + epsilon, 1 - epsilon)] * len(x0) logging.info('running the %s optimizer' % self.minimizer_settings['method']['value']) # Using scipy.opt.minimize allows a whole host of minimisers to be used # This set by the method value in your minimiser settings file self.n_minimizer_calls = 0 if skip: best_fit_vals = x0 metric_val = self._minimizer_callable(x0, False) dict_flags = { 'warnflag': 0, 'task': 'skip', 'funcalls': 0, 'nit': 0, 'avg_tmp_time': 0, 'n_minimizer_calls': 0 } else: start_t = time.time() minim_result = opt.minimize( fun=self._minimizer_callable, x0=x0, args=(pprint, ), bounds=bounds, method=self.minimizer_settings['method']['value'], options=self.minimizer_settings['options']['value']) # get aditional metrics: end_t = time.time() if pprint: # clear the line print '' print '\naverage template generation time during minimizer run: %.4f ms' % ( (end_t - start_t) * 1000. / self.n_minimizer_calls) avg_tmp_time = (end_t - start_t) * 1000. / self.n_minimizer_calls best_fit_vals = minim_result.x metric_val = minim_result.fun template = self.template_maker.get_outputs() template = [t.combine_wildcard('*') for t in template] template[0].name = 'total' dict_flags = {} mod_chi2_val = ( self.pseudodata.metric_total(expected_values=template, metric='mod_chi2') + template_maker.params.priors_penalty(metric='mod_chi2')) dict_flags['agreement_mod_chi2'] = mod_chi2_val dict_flags['warnflag'] = minim_result.status dict_flags['task'] = minim_result.message if minim_result.has_key('jac'): dict_flags['grad'] = minim_result.jac dict_flags['funcalls'] = minim_result.nfev dict_flags['nit'] = minim_result.nit dict_flags['avg_tmp_time'] = avg_tmp_time dict_flags['n_minimizer_calls'] = self.n_minimizer_calls if dict_flags['warnflag'] > 0: logging.warning(str(dict_flags)) all_metrics = {} template = self.template_maker.get_outputs() template = [t.combine_wildcard('*') for t in template] template[0].name = 'total' #for metric in ['llh', 'conv_llh', 'barlow_llh','chi2', 'mod_chi2']: for metric in ['llh', 'chi2']: all_metrics[metric] = self.pseudodata.metric_total( expected_values=template, metric=metric) + template_maker.params.priors_penalty( metric=metric) return best_fit_vals, metric_val, all_metrics, dict_flags
aeff_list = [] aeff_err_list = [] flavor_list = [] cut_sim_down = True solid_angle = 2.0*np.pi if args.all_cz: # Then use all sky, don't remove simulated downgoing events: cut_sim_down = False solid_angle = 4.0*np.pi # Loop over all neutrino flavours, and get cc Aeff: for flav,val in nuDict.items(): logging.info("Loading data for %s..."%flav) data = LoadData(args.data_dir,args.geom_str,flav) cc_cuts = list(s1_s2_cuts) cc_cuts.append(("I3MCWeightDict","InteractionType",1)) cc_cuts.append((args.mcnu,"type",val)) cut_list = get_arb_cuts(data,cc_cuts,mcnu=args.mcnu,cut_sim_down=cut_sim_down) logging.info(" NEvents: %d"%np.sum(cut_list)) if 'nue' in flav: nfiles = args.ne elif 'numu' in flav: nfiles = args.nmu elif 'nutau' in flav: nfiles = args.ntau else: raise ValueError("Unrecognized flav: %s"%flav)
def test_to_json_from_json(): """Unit tests for writing various types of objects to and reading from JSON files (including bz2-compressed and xor-scrambled files)""" # pylint: disable=unused-variable from shutil import rmtree import sys from pisa.utils.comparisons import recursiveEquality proto_float_array = np.array([-np.inf, np.nan, np.inf, -1.1, 0.0, 1.1], dtype=np.float64) proto_int_array = np.array([-2, -1, 0, 1, 2], dtype=np.int64) proto_str_array = np.array(['a', 'ab', 'abc', '', ' '], dtype=str) floating_types = [float] + sorted( set(t for _, t in np.typeDict.items() if issubclass(t, np.floating)), key=str, ) integer_types = [int] + sorted( set(t for _, t in np.typeDict.items() if issubclass(t, np.integer)), key=str, ) test_info = [ dict( proto_array=proto_float_array, dtypes=floating_types, ), dict( proto_array=proto_int_array, dtypes=integer_types, ), # TODO: strings currently do not work #dict( # proto_array=proto_str_array, # dtypes=[str, np.str0, np.str_, np.string_], #), ] test_data = OrderedDict() for info in test_info: proto_array = info['proto_array'] for dtype in info['dtypes']: typed_array = proto_array.astype(dtype) s_dtype = str(np.dtype(dtype)) test_data["array_" + s_dtype] = typed_array test_data["scalar_" + s_dtype] = dtype(typed_array[0]) temp_dir = tempfile.mkdtemp() try: for name, obj in test_data.items(): # Test that the object can be written / read directly base_fname = os.path.join(temp_dir, name + '.json') for ext in ['', '.bz2', '.xor']: fname = base_fname + ext to_json(obj, fname) loaded_data = from_json(fname) if obj.dtype in floating_types: assert np.allclose( loaded_data, obj, rtol=1e-12, atol=0, equal_nan=True ), '{}=\n{}\nloaded=\n{}\nsee file: {}'.format( name, obj, loaded_data, fname) else: assert np.all(loaded_data == obj), \ '{}=\n{}\nloaded_nda=\n{}\nsee file: {}'.format( name, obj, loaded_data, fname ) # Test that the same object can be written / read as a value in a # dictionary orig = OrderedDict([(name, obj), (name + "x", obj)]) base_fname = os.path.join(temp_dir, 'd.{}.json'.format(name)) for ext in ['', '.bz2', '.xor']: fname = base_fname + ext to_json(orig, fname) loaded = from_json(fname) assert recursiveEquality(loaded, orig), \ 'orig=\n{}\nloaded=\n{}\nsee file: {}'.format( orig, loaded, fname ) finally: rmtree(temp_dir) logging.info('<< PASS : test_to_json_from_json >>')
def load_interpolated_hypersurfaces(input_file): ''' Load a set of interpolated hypersurfaces from a file. Analogously to "load_hypersurfaces", this function returns a collection with a HypersurfaceInterpolator object for each Map. Parameters ---------- input_file : str A JSON input file as produced by fit_hypersurfaces if interpolation params were given. It has the form:: { interpolation_param_spec = { 'param1': {"values": [val1_1, val1_2, ...], "scales_log": True/False} 'param2': {"values": [val2_1, val2_2, ...], "scales_log": True/False} ... 'paramN': {"values": [valN_1, valN_2, ...], "scales_log": True/False} }, 'hs_fits': [ <list of dicts where keys are map names such as 'nue_cc' and values are hypersurface states> ] } Returns ------- collections.OrderedDict dictionary with a :obj:`HypersurfaceInterpolator` for each map ''' assert isinstance(input_file, str) if input_file.endswith("json") or input_file.endswith("json.bz2"): logging.info(f"Loading interpolated hypersurfaces from file: {input_file}") input_data = from_json(input_file) assert set(['interpolation_param_spec', 'hs_fits']).issubset( set(input_data.keys())), 'missing keys' map_names = None # input_data['hs_fits'] is a list of dicts, each dict contains "param_values" # and "hs_fit" logging.info("Reading file complete, generating hypersurfaces...") for hs_fit_dict in input_data['hs_fits']: # this is still not the actual Hypersurface, but a dict with the (linked) # maps and the HS fit for the map... hs_state_maps = hs_fit_dict["hs_fit"] if map_names is None: map_names = list(hs_state_maps.keys()) else: assert set(map_names) == set(hs_state_maps.keys()), "inconsistent maps" # When data is recovered from JSON, the object states are not automatically # converted to the corresponding objects, so we need to do it manually here. for map_name in map_names: hs_state_maps[map_name] = Hypersurface.from_state(hs_state_maps[map_name]) logging.info(f"Read hypersurface maps: {map_names}") # Now we have a list of dicts where the map names are on the lower level. # We need to convert this into a dict of HypersurfaceInterpolator objects. output = collections.OrderedDict() for m in map_names: hs_fits = [{"param_values": fd["param_values"], "hs_fit": fd['hs_fit'][m]} for fd in input_data['hs_fits']] output[m] = HypersurfaceInterpolator(input_data['interpolation_param_spec'], hs_fits) else: raise Exception("unknown file format") return output
def test_bootstrap(): """Unit test for the bootstrap stage.""" from pisa.core.distribution_maker import DistributionMaker from pisa.core.map import Map from pisa.utils.config_parser import parse_pipeline_config from pisa.utils.comparisons import ALLCLOSE_KW from numpy.testing import assert_allclose example_cfg = parse_pipeline_config("settings/pipeline/example.cfg") # We need to insert the bootstrap stage right after the data loading stage bootstrap_pipe_cfg = insert_bootstrap_after_data_loader(example_cfg, seed=0) logging.debug("bootstrapped pipeline stage order:") logging.debug(list(bootstrap_pipe_cfg.keys())) # get a baseline dmaker = DistributionMaker([example_cfg]) map_baseline = dmaker.get_outputs(return_sum=True)[0] # Make sure that different seeds produce different maps, and that the same seed will # produce the same map. dmaker = DistributionMaker([bootstrap_pipe_cfg]) map_seed0 = dmaker.get_outputs(return_sum=True)[0] # find key of bootstrap stage bootstrap_idx = 0 for i, stage in enumerate(dmaker.pipelines[0].stages): if stage.__class__.__name__ == "bootstrap": bootstrap_idx = i # without re-loading the entire pipeline, we set the seed and call the setup function # to save time for the test dmaker.pipelines[0].stages[bootstrap_idx].seed = 1 dmaker.pipelines[0].stages[bootstrap_idx].setup() map_seed1 = dmaker.get_outputs(return_sum=True)[0] assert not map_seed0 == map_seed1 dmaker.pipelines[0].stages[bootstrap_idx].seed = 0 dmaker.pipelines[0].stages[bootstrap_idx].setup() map_seed0_reprod = dmaker.get_outputs(return_sum=True)[0] assert map_seed0 == map_seed0_reprod # Quantify the variance of the resulting maps. They should be about the size of the # expectation from sum of weights-squared. nominal_values = [] for i in range(100): dmaker.pipelines[0].stages[bootstrap_idx].seed = i dmaker.pipelines[0].stages[bootstrap_idx].setup() map_bootstrap = dmaker.get_outputs(return_sum=True)[0] nominal_values.append(map_bootstrap.nominal_values) nominal_values = np.stack(nominal_values) with np.errstate(divide="ignore", invalid="ignore"): # calculate the ratio between the bootstrap nominal and the baseline nominal bs_nom_ratios = np.mean(nominal_values, axis=0) / map_baseline.nominal_values # and the standard deviation ratio as well bs_std_ratios = np.std(nominal_values, axis=0) / map_baseline.std_devs # assert that both nominal and standard deviation match the expectation from # baseline up to a small error assert np.abs(np.nanmean(bs_nom_ratios) - 1.0) < 0.01 # the standard deviations are a little harder to match in 100 samples assert np.abs(np.nanmean(bs_std_ratios) - 1.0) < 0.02 logging.info("<< PASS : bootstrap >>")
def test_MutableMultiFileIterator(): """Unit test for class `MutableMultiFileIterator`""" import shutil import tempfile prefixes = ['a', 'b', 'c'] file_len = 4 reference_lines = [ # start in file a 'a0', 'a1', # switch to file b after second line of a 'b0', 'b1', # switch to file c after second line of b 'c0', 'c1', 'c2', 'c3', # switch back to b after exhausting c 'b2', 'b3', # switch back to a after exhausting b 'a2', 'a3' ] tempdir = tempfile.mkdtemp() try: # Create test files paths = [join(tempdir, prefix) for prefix in prefixes] for prefix, path in zip(prefixes, paths): with open(path, 'w') as f: for i in range(file_len): f.write('%s%d\n' % (prefix, i)) logging.trace(path) actual_lines = [] with open(paths[0]) as fp: file_iter = MutableMultiFileIterator(fp=fp, fpname=paths[0]) remaining_paths = paths[1:] for record in file_iter: actual_lines.append(record['line'].strip()) logging.trace(str(record)) if record['line'][1:].strip() == '1': if remaining_paths: path = remaining_paths.pop(0) file_iter.switch_to_file(fpname=path) else: for l in str(file_iter.location).split('\n'): logging.trace(l) except: shutil.rmtree(tempdir) raise if actual_lines != reference_lines: raise ValueError('<< FAIL : test_MutableMultiFileIterator >>') logging.info('<< PASS : test_MutableMultiFileIterator >>')
def test_hdf(): """Unit tests for hdf module""" from shutil import rmtree from tempfile import mkdtemp data = OrderedDict([ ('top', OrderedDict([ ('secondlvl1', OrderedDict([ ('thirdlvl11', np.linspace(1, 100, 10000).astype(np.float64)), ('thirdlvl12', b"this is a string"), ('thirdlvl13', b"this is another string"), ('thirdlvl14', 1), ('thirdlvl15', 1.1), ('thirdlvl16', np.float32(1.1)), ('thirdlvl17', np.float64(1.1)), ('thirdlvl18', np.int8(1)), ('thirdlvl19', np.int16(1)), ('thirdlvl110', np.int32(1)), ('thirdlvl111', np.int64(1)), ('thirdlvl112', np.uint8(1)), ('thirdlvl113', np.uint16(1)), ('thirdlvl114', np.uint32(1)), ('thirdlvl115', np.uint64(1)), ])), ('secondlvl2', OrderedDict([ ('thirdlvl21', np.linspace(1, 100, 10000).astype(np.float32)), ('thirdlvl22', b"this is a string"), ('thirdlvl23', b"this is another string"), ])), ('secondlvl3', OrderedDict([ ('thirdlvl31', np.array(range(1000)).astype(np.int)), ('thirdlvl32', b"this is a string"), ])), ('secondlvl4', OrderedDict([ ('thirdlvl41', np.linspace(1, 100, 10000)), ('thirdlvl42', b"this is a string"), ])), ('secondlvl5', OrderedDict([ ('thirdlvl51', np.linspace(1, 100, 10000)), ('thirdlvl52', b"this is a string"), ])), ('secondlvl6', OrderedDict([ ('thirdlvl61', np.linspace(100, 1000, 10000)), ('thirdlvl62', b"this is a string"), ])), ])) ]) temp_dir = mkdtemp() try: fpath = os.path.join(temp_dir, 'to_hdf_noattrs.hdf5') to_hdf(data, fpath, overwrite=True, warn=False) loaded_data1 = from_hdf(fpath) assert data.keys() == loaded_data1.keys() assert recursiveEquality(data, loaded_data1), \ str(data) + "\n" + str(loaded_data1) attrs = OrderedDict([ ('float', 9.98237), ('float32', np.float32(1.)), ('float64', np.float64(1.)), ('pi', np.float64(np.pi)), ('string', "string attribute!"), ('int', 1), ('int8', np.int8(1)), ('int16', np.int16(1)), ('int32', np.int32(1)), ('int64', np.int64(1)), ('uint8', np.uint8(1)), ('uint16', np.uint16(1)), ('uint32', np.uint32(1)), ('uint64', np.uint64(1)), ('bool', True), ('bool8', np.bool8(True)), ('bool_', np.bool_(True)), ]) attr_type_checkers = { "float": lambda x: isinstance(x, float), "float32": lambda x: x.dtype == np.float32, "float64": lambda x: x.dtype == np.float64, "pi": lambda x: x.dtype == np.float64, "string": lambda x: isinstance(x, string_types), "int": lambda x: isinstance(x, int), "int8": lambda x: x.dtype == np.int8, "int16": lambda x: x.dtype == np.int16, "int32": lambda x: x.dtype == np.int32, "int64": lambda x: x.dtype == np.int64, "uint8": lambda x: x.dtype == np.uint8, "uint16": lambda x: x.dtype == np.uint16, "uint32": lambda x: x.dtype == np.uint32, "uint64": lambda x: x.dtype == np.uint64, "bool": lambda x: isinstance(x, bool), "bool8": lambda x: x.dtype == np.bool8, "bool_": lambda x: x.dtype == np.bool_, } fpath = os.path.join(temp_dir, 'to_hdf_withattrs.hdf5') to_hdf(data, fpath, attrs=attrs, overwrite=True, warn=False) loaded_data2 = from_hdf(fpath) loaded_attrs = loaded_data2.attrs assert data.keys() == loaded_data2.keys() assert attrs.keys() == loaded_attrs.keys(), \ '\n' + str(attrs.keys()) + '\n' + str(loaded_attrs.keys()) assert recursiveEquality(data, loaded_data2) assert recursiveEquality(attrs, loaded_attrs) for key, val in attrs.items(): tgt_type_checker = attr_type_checkers[key] assert tgt_type_checker(val), \ "key '%s': val '%s' is type '%s'" % \ (key, val, type(loaded_attrs[key])) finally: rmtree(temp_dir) logging.info('<< PASS : test_hdf >>')
def _compute_nominal_transforms(self): """Compute cross-section transforms.""" logging.info('Updating xsec.genie cross-section histograms...') self.load_xsec_splines() livetime = self._ev_param(self.params['livetime'].value) ice_p = self._ev_param(self.params['ice_p'].value) fid_vol = self._ev_param(self.params['fid_vol'].value) mr_h20 = self._ev_param(self.params['mr_h20'].value) x_energy_scale = self.params['x_energy_scale'].value input_binning = self.input_binning ebins = input_binning.true_energy for idx, name in enumerate(input_binning.names): if 'true_energy' in name: e_idx = idx xsec_transforms = {} for flav in self.input_names: for int_ in ALL_NUINT_TYPES: flavint = flav + '_' + str(int_) logging.debug('Obtaining cross-sections for %s', flavint) xsec_map = self.xsec.get_map(flavint, MultiDimBinning([ebins]), x_energy_scale=x_energy_scale) def func(idx): if idx == e_idx: return xsec_map.hist return tuple(range(input_binning.shape[idx])) num_dims = input_binning.num_dims xsec_trns = np.meshgrid(*map(func, range(num_dims)), indexing='ij')[e_idx] xsec_trns *= (livetime * fid_vol * (ice_p / mr_h20) * (6.022140857e+23 / ureg.mol)) xsec_transforms[NuFlavInt(flavint)] = xsec_trns nominal_transforms = [] for flavint_group in self.transform_groups: flav_names = [str(flav) for flav in flavint_group.flavs] for input_name in self.input_names: if input_name not in flav_names: continue xform_array = [] for flavint in flavint_group.flavints: if flavint in xsec_transforms: xform_array.append(xsec_transforms[flavint]) xform_array = reduce(add, xform_array) xform = BinnedTensorTransform( input_names=input_name, output_name=str(flavint_group), input_binning=input_binning, output_binning=self.output_binning, xform_array=xform_array) nominal_transforms.append(xform) return TransformSet(transforms=nominal_transforms)
def prepare_interpolated_fit( nominal_dataset, sys_datasets, params, fit_directory, interpolation_param_spec, combine_regex=None, log=False, **hypersurface_fit_kw ): ''' Writes steering files for fitting hypersurfaces on a grid of arbitrary parameters. The fits can then be run on a cluster with `run_interpolated_fit`. Parameters ---------- nominal_dataset : dict Definition of the nominal dataset. Specifies the pipleline with which the maps can be created, and the values of all systematic parameters used to produced the dataset. Format must be: nominal_dataset = { "pipeline_cfg" = <pipeline cfg file (either cfg file path or dict)>), "sys_params" = { param_0_name : param_0_value_in_dataset, ..., param_N_name : param_N_value_in_dataset } } Sys params must correspond to the provided HypersurfaceParam instances provided in the `params` arg. sys_datasets : list of dicts List of dicts, where each dict defines one of the systematics datasets to be fitted. The format of each dict is the same as explained for `nominal_dataset` params : list of HypersurfaceParams List of HypersurfaceParams instances that define the hypersurface. Note that this defined ALL hypersurfaces fitted in this function, e.g. only supports a single parameterisation for all maps (this is almost always what you want). output_directory : str Directory in which the fits will be run. Steering files for the fits to be run will be stored here. combine_regex : list of str, or None List of string regex expressions that will be used for merging maps. Used to combine similar species. Must be something that can be passed to the `MapSet.combine_re` function (see that functions docs for more details). Choose `None` is do not want to perform this merging. interpolation_param_spec : collections.OrderedDict Specification of parameter grid that hypersurfaces should be interpolated over. The dict should have the following form:: interpolation_param_spec = { 'param1': {"values": [val1_1, val1_2, ...], "scales_log": True/False} 'param2': {"values": [val2_1, val2_2, ...], "scales_log": True/False} ... 'paramN': {"values": [valN_1, valN_2, ...], "scales_log": True/False} } The hypersurfaces will be fit on an N-dimensional rectilinear grid over parameters 1 to N. The flag `scales_log` indicates that the interpolation over that parameter should happen in log-space. hypersurface_fit_kw : kwargs kwargs will be passed on to the calls to `Hypersurface.fit` ''' # Take (deep) copies of lists/dicts to avoid modifying the originals # Useful for cases where this function is called in a loop (e.g. leave-one-out tests) nominal_dataset = copy.deepcopy(nominal_dataset) sys_datasets = copy.deepcopy(sys_datasets) params = copy.deepcopy(params) # Check types assert isinstance(sys_datasets, collections.Sequence) assert isinstance(params, collections.Sequence) assert isinstance(fit_directory, str) # there must not be any ambiguity between fitting the hypersurfaces and # interpolating them later msg = "interpolation params must be specified as a dict with ordered keys" assert isinstance(interpolation_param_spec, collections.OrderedDict), msg for k, v in interpolation_param_spec.items(): assert set(v.keys()) == {"values", "scales_log"} assert isinstance(v["values"], collections.Sequence) # We need to extract the magnitudes from the Quantities to avoid a # UnitStrippedWarning. For some reason, doing `np.min(v["values"])` messes up # the data structure inside the values in a way that can cause a crash when we # try to serialize the values later. Lesson: Stripping units inadvertently can # have strange, unforeseen consequences. mags = [x.m for x in v["values"]] if v["scales_log"] and np.min(mags) <= 0: raise ValueError("A log-scaling parameter cannot be equal to or less " "than zero!") # Check output format and path assert os.path.isdir(fit_directory), "fit directory does not exist" # Check formatting of datasets is as expected all_datasets = [nominal_dataset] + sys_datasets for dataset in all_datasets: assert isinstance(dataset, collections.Mapping) assert "pipeline_cfg" in dataset assert isinstance(dataset["pipeline_cfg"], (str, collections.Mapping)) assert "sys_params" in dataset assert isinstance(dataset["sys_params"], collections.Mapping) dataset["pipeline_cfg"] = serialize_pipeline_cfg(dataset["pipeline_cfg"]) # Check params assert len(params) >= 1 for p in params: assert isinstance(p, HypersurfaceParam) # Report inputs msg = "Hypersurface fit details :\n" msg += f" Num params : {len(params)}\n" msg += f" Num fit coefficients : {sum([p.num_fit_coeffts for p in params])}\n" msg += f" Num datasets : 1 nominal + {len(sys_datasets)} systematics\n" msg += f" Nominal values : {nominal_dataset['sys_params']}\n" msg += "Hypersurface fits are prepared on the following grid:\n" msg += str(interpolation_param_spec) logging.info(msg) # because we require this to be an OrderedDict, there is no ambiguity in the # construction of the mesh here param_names = list(interpolation_param_spec.keys()) grid_shape = tuple(len(v["values"]) for v in interpolation_param_spec.values()) # We store all information needed to run a fit in metadata metadata = collections.OrderedDict( interpolation_param_spec=interpolation_param_spec, interpolation_param_names=param_names, # convenience grid_shape=grid_shape, # convenience nominal_dataset=nominal_dataset, sys_datasets=sys_datasets, hypersurface_params=params, combine_regex=combine_regex, log=log, hypersurface_fit_kw=hypersurface_fit_kw ) to_json(metadata, os.path.join(fit_directory, "metadata.json")) # we write on JSON file for each grid point for job_idx, grid_idx in enumerate(np.ndindex(grid_shape)): # Although this is technically redundant, we store the parameter values # explicitly for each grid point. param_values = {} for i, n in enumerate(param_names): param_values[n] = interpolation_param_spec[n]["values"][grid_idx[i]] gridpoint_data = { "param_values": param_values, "hs_fit": None, "job_idx": job_idx, "grid_idx": grid_idx, "fit_successful": False } to_json(gridpoint_data, os.path.join(fit_directory, f"gridpoint_{job_idx:06d}.json.bz2")) logging.info(f"Grid fit preparation complete! Total number of jobs: {job_idx+1}") return job_idx+1 # zero-indexing
def get_combined_xsec(fpath, ver=None): """Load the cross-section values from a ROOT file and instantiate a CombinedSpline object.""" # NOTE: ROOT import here as it is optional but still want to import # module for e.g. building docs import ROOT fpath = find_resource(fpath) logging.info('Loading GENIE ROOT cross-section file %s', fpath) # Name of neutrino flavours in the ROOT file. flavs = ('nu_e', 'nu_mu', 'nu_tau', 'nu_e_bar', 'nu_mu_bar', 'nu_tau_bar') rfile = ROOT.TFile.Open(fpath, 'read') # pylint: disable=no-member xsec_splines = FlavIntData() for flav in flavs: for int_ in ALL_NUINT_TYPES: xsec_splines[flav, int_] = {} for part in ('O16', 'H1'): str_repr = flav + '_' + part + '/' + 'tot_' + str(int_) xsec_splines[flav + str(int_)][part] = \ ROOT.gDirectory.Get(str_repr) # pylint: disable=no-member rfile.Close() def eval_spl(spline, binning, out_units=ureg.m**2, x_energy_scale=1, **kwargs): init_names = ['true_energy'] init_units = [ureg.GeV] if set(binning.names) != set(init_names): raise ValueError('Input binning names {0} does not match ' 'instantiation binning names ' '{1}'.format(binning.names, init_names)) if set(map(str, binning.units)) != set(map(str, init_units)): for name in init_names: binning[name].to(init_units) bin_centers = [x.m for x in binning.weighted_centers][0] nu_O16, nu_H1 = [], [] for e_val in bin_centers: nu_O16.append(spline['O16'].Eval(e_val)) nu_H1.append(spline['H1'].Eval(e_val)) nu_O16, nu_H1 = map(np.array, (nu_O16, nu_H1)) nu_xsec = ((0.8879 * nu_O16) + (0.1121 * nu_H1)) * 1E-38 * ureg.cm**2 nu_xsec_hist = nu_xsec.to(out_units).magnitude return Map(hist=nu_xsec_hist, binning=binning, **kwargs) def validate_spl(binning): if np.all(binning.true_energy.midpoints.m > 1E3): raise ValueError('Energy value {0} out of range in array ' '{0}'.format(binning.true_energy)) inXSec = [] for flav in flavs: for int_ in ALL_NUINT_TYPES: flavint = NuFlavInt(flav + str(int_)) xsec = Spline(name=str(flavint), spline=xsec_splines[flavint], eval_spl=eval_spl, validate_spl=validate_spl) inXSec.append(xsec) return CombinedSpline(inXSec, interactions=True, ver=ver)
# Read the template settings template_settings = from_json(args.template_settings) # This file only contains the number of test points for each parameter (and # perhaps eventually a non-linearity criterion) grid_settings = from_json(args.grid_settings) # Get the Fisher matrices for the desired hierarchy and fiducial settings fisher_matrices = get_fisher_matrices(template_settings=template_settings, grid_settings=grid_settings, IMH=args.inverted_truth, NMH=args.normal_truth, dump_all_stages=args.dump_all_stages, save_templates=args.save_templates, outdir=args.outdir) # Fisher matrices are saved in any case for data_tag in fisher_matrices: fisher_basename = 'fisher_data_%s' % data_tag for chan in fisher_matrices[data_tag]: if chan == 'comb': outfile = os.path.join(args.outdir, fisher_basename + '.json') logging.info("%s: writing combined Fisher matrix to %s" % (data_tag, outfile)) else: outfile = os.path.join(args.outdir, fisher_basename + '_%s.json' % chan) logging.info("%s: writing Fisher matrix for channel %s to %s" % (data_tag, chan, outfile)) fisher_matrices[data_tag][chan].saveFile(outfile)
def run_unit_tests(path=PISA_PATH, allow_missing=OPTIONAL_MODULES, verbosity=Levels.WARN): """Run all tests found at `path` (or recursively below if `path` is a directory). Each module is imported and each test function is run initially with `set_verbosity(verbosity)`, but if an exception is caught, the module is re-imported or the test function is re-run with `set_verbosity(Levels.TRACE)`, then the traceback from the (original) exception emitted is displayed. Parameters ---------- path : str Path to file or directory allow_missing : None or sequence of str verbosity : int in pisa.utils.log.Levels Raises ------ Exception If any import or test fails not in `allow_missing` """ set_verbosity(verbosity) logging.info("%sPlatform information:", PFX) logging.info("%s HOSTNAME = %s", PFX, socket.gethostname()) logging.info("%s FQDN = %s", PFX, socket.getfqdn()) logging.info("%s OS = %s %s", PFX, platform.system(), platform.release()) for key, val in cpuinfo.get_cpu_info().items(): logging.info("%s %s = %s", PFX, key, val) logging.info(PFX) logging.info("%sModule versions:", PFX) for module_name in REQUIRED_MODULES + OPTIONAL_MODULES: try: module = import_module(module_name) except ImportError: if module_name in REQUIRED_MODULES: raise ver = "optional module not installed or not import-able" else: if hasattr(module, "__version__"): ver = module.__version__ else: ver = "?" logging.info("%s %s : %s", PFX, module_name, ver) logging.info(PFX) path = expand(path, absolute=True, resolve_symlinks=True) if allow_missing is None: allow_missing = [] elif isinstance(allow_missing, str): allow_missing = [allow_missing] tests = find_unit_tests(path) module_pypaths_succeeded = [] module_pypaths_failed = [] module_pypaths_failed_ignored = [] test_pypaths_succeeded = [] test_pypaths_failed = [] test_pypaths_failed_ignored = [] for rel_file_path, test_func_names in tests.items(): pypath = ["pisa"] + rel_file_path[:-3].split("/") parent_pypath = ".".join(pypath[:-1]) module_name = pypath[-1].replace(".", "_") module_pypath = f"{parent_pypath}.{module_name}" try: set_verbosity(verbosity) logging.info(PFX + f"importing {module_pypath}") set_verbosity(Levels.WARN) module = import_module(module_pypath, package=parent_pypath) except Exception as err: if (isinstance(err, ImportError) and hasattr(err, "name") and err.name in allow_missing # pylint: disable=no-member ): err_name = err.name # pylint: disable=no-member module_pypaths_failed_ignored.append(module_pypath) logging.warning( f"{PFX}module {err_name} failed to import wile importing" f" {module_pypath}, but ok to ignore") continue module_pypaths_failed.append(module_pypath) set_verbosity(verbosity) msg = f"<< FAILURE IMPORTING : {module_pypath} >>" logging.error(PFX + "=" * len(msg)) logging.error(PFX + msg) logging.error(PFX + "=" * len(msg)) # Reproduce the failure with full output set_verbosity(Levels.TRACE) try: import_module(module_name, package=parent_pypath) except Exception: pass set_verbosity(Levels.TRACE) logging.exception(err) set_verbosity(verbosity) logging.error(PFX + "#" * len(msg)) continue else: module_pypaths_succeeded.append(module_pypath) for test_func_name in test_func_names: test_pypath = f"{module_pypath}.{test_func_name}" try: set_verbosity(verbosity) logging.debug(PFX + f"getattr({module}, {test_func_name})") set_verbosity(Levels.WARN) test_func = getattr(module, test_func_name) # Run the test function set_verbosity(verbosity) logging.info(PFX + f"{test_pypath}()") set_verbosity(Levels.WARN) test_func() except Exception as err: if (isinstance(err, ImportError) and hasattr(err, "name") and err.name in allow_missing # pylint: disable=no-member ): err_name = err.name # pylint: disable=no-member test_pypaths_failed_ignored.append(module_pypath) logging.warning( PFX + f"{test_pypath} failed because module {err_name} failed to" + f" load, but ok to ignore") continue test_pypaths_failed.append(test_pypath) set_verbosity(verbosity) msg = f"<< FAILURE RUNNING : {test_pypath} >>" logging.error(PFX + "=" * len(msg)) logging.error(PFX + msg) logging.error(PFX + "=" * len(msg)) # Reproduce the error with full output set_verbosity(Levels.TRACE) try: test_func = getattr(module, test_func_name) with np.printoptions( precision=np.finfo(pisa.FTYPE).precision + 2, floatmode="fixed", sign=" ", linewidth=200, ): test_func() except Exception: pass set_verbosity(Levels.TRACE) logging.exception(err) set_verbosity(verbosity) logging.error(PFX + "#" * len(msg)) else: test_pypaths_succeeded.append(test_pypath) finally: # remove references to the test function, e.g. to remove refs # to pycuda / numba.cuda contexts so these can be closed try: del test_func except NameError: pass # NOTE: Until we get all GPU code into Numba, need to unload pycuda # and/or numba.cuda contexts before a module requiring the other one is # to be imported. # NOTE: the following causes a traceback to be emitted at the very end # of the script, regardless of the exception catching here. if (pisa.TARGET == "cuda" and pycuda is not None and hasattr(pycuda, "autoinit") and hasattr(pycuda.autoinit, "context")): try: pycuda.autoinit.context.detach() except Exception: pass # Attempt to unload the imported module # TODO: pipeline, etc. fail as isinstance(service, (Stage, PiStage)) is False #if module_pypath in sys.modules and module_pypath != "pisa": # del sys.modules[module_pypath] #del module # TODO: crashes program; subseqeunt calls in same shell crash(!?!?) # if pisa.TARGET == 'cuda' and nbcuda is not None: # try: # nbcuda.close() # except Exception: # pass # Summarize results n_import_successes = len(module_pypaths_succeeded) n_import_failures = len(module_pypaths_failed) n_import_failures_ignored = len(module_pypaths_failed_ignored) n_test_successes = len(test_pypaths_succeeded) n_test_failures = len(test_pypaths_failed) n_test_failures_ignored = len(test_pypaths_failed_ignored) set_verbosity(verbosity) logging.info( PFX + f"<< IMPORT TESTS : {n_import_successes} imported," f" {n_import_failures} failed," f" {n_import_failures_ignored} failed to import but ok to ignore >>") logging.info(PFX + f"<< UNIT TESTS : {n_test_successes} succeeded," f" {n_test_failures} failed," f" {n_test_failures_ignored} failed but ok to ignore >>") # Exit with error if any failures (import or unit test) if module_pypaths_failed or test_pypaths_failed: msgs = [] if module_pypaths_failed: msgs.append( f"{n_import_failures} module(s) failed to import:\n " + ", ".join(module_pypaths_failed)) if test_pypaths_failed: msgs.append(f"{n_test_failures} unit test(s) failed:\n " + ", ".join(test_pypaths_failed)) # Note the extra newlines before the exception to make it stand out; # and newlines after the exception are due to the pycuda error message # that is emitted when we call pycuda.autoinit.context.detach() sys.stdout.flush() sys.stderr.write("\n\n\n") raise Exception("\n".join(msgs) + "\n\n\n")
def test_Events(): """Unit tests for Events class""" from pisa.utils.flavInt import NuFlavInt # Instantiate empty object events = Events() # Instantiate from PISA events HDF5 file events = Events( 'events/events__vlvnt__toy_1_to_80GeV_spidx1.0_cz-1_to_1_1e2evts_set0__unjoined__with_fluxes_honda-2015-spl-solmin-aa.hdf5' ) # Apply a simple cut events = events.applyCut('(true_coszen <= 0.5) & (true_energy <= 70)') for fi in events.flavints: assert np.max(events[fi]['true_coszen']) <= 0.5 assert np.max(events[fi]['true_energy']) <= 70 # Apply an "inbounds" cut via a OneDimBinning true_e_binning = OneDimBinning(name='true_energy', num_bins=80, is_log=True, domain=[10, 60] * ureg.GeV) events = events.keepInbounds(true_e_binning) for fi in events.flavints: assert np.min(events[fi]['true_energy']) >= 10 assert np.max(events[fi]['true_energy']) <= 60 # Apply an "inbounds" cut via a MultiDimBinning true_e_binning = OneDimBinning(name='true_energy', num_bins=80, is_log=True, domain=[20, 50] * ureg.GeV) true_cz_binning = OneDimBinning(name='true_coszen', num_bins=40, is_lin=True, domain=[-0.8, 0]) mdb = MultiDimBinning([true_e_binning, true_cz_binning]) events = events.keepInbounds(mdb) for fi in events.flavints: assert np.min(events[fi]['true_energy']) >= 20 assert np.max(events[fi]['true_energy']) <= 50 assert np.min(events[fi]['true_coszen']) >= -0.8 assert np.max(events[fi]['true_coszen']) <= 0 # Now try to apply a cut that fails on one flav/int (since the field will # be missing) and make sure that the cut did not get applied anywhere in # the end (i.e., it is rolled back) sub_evts = events['nutaunc'] sub_evts.pop('true_energy') events['nutaunc'] = sub_evts try: events = events.applyCut('(true_energy >= 30) & (true_energy <= 40)') except Exception: pass else: raise Exception('Should not have been able to apply the cut!') for fi in events.flavints: if fi == NuFlavInt('nutaunc'): continue assert np.min(events[fi]['true_energy']) < 30 logging.info( '<< PASS : test_Events >> (note:' ' "[ ERROR] Events object is in an inconsistent state. Reverting cut' ' for all flavInts." message above **is expected**.)')
def setup_function(self): self.data.representation = self.calc_mode # # Init arrays # # Prepare some array shapes gradient_params_shape = (len(self.gradient_param_names), ) if self.data.is_map: # speed up calculation by adding links # as nominal flux doesn't depend on the (outgoing) flavour self.data.link_containers('nu', [ 'nue_cc', 'numu_cc', 'nutau_cc', 'nue_nc', 'numu_nc', 'nutau_nc' ]) self.data.link_containers('nubar', [ 'nuebar_cc', 'numubar_cc', 'nutaubar_cc', 'nuebar_nc', 'numubar_nc', 'nutaubar_nc' ]) # Loop over containers for container in self.data: # Define shapes for containers # TODO maybe include toggles for nutau (only needed if prompt # considered) and for nu+nubar (only needed if nu->nubar # oscillations included) for better speed/memory performance # [ N events, 2 flavors in flux, nu vs nubar ] # SDB - reduced flavours to 2 (nue, numu) since nutau flux not # stored in MCEq splines flux_container_shape = (container.size, 2) gradients_shape = tuple( list(flux_container_shape) + list(gradient_params_shape)) container["nu_flux"] = np.full(flux_container_shape, np.NaN, dtype=FTYPE) container["gradients"] = np.full(gradients_shape, np.NaN, dtype=FTYPE) # Also create an array container to hold the gradient parameter values # Only want this once, e.g. not once per container self.gradient_params = np.empty(gradient_params_shape, dtype=FTYPE) # # Load MCEq splines # # Have splines for each Barr parameter, plus +/- versions of each # Barr parameter corresponding to mesons/antimesons. # For a given Barr parameter, an underlying dictionary have the following # keywords: "dnumu", "dnumubar", "dnue", dnuebar" # Units are changed to m^-2 in creates_splines.., rather than cm^2 which # is the unit of calculation in MCEq!!!! # Note that doing this all on CPUs, since the splines reside on the CPUs # The actual `compute_function` computation can be done on GPUs though # Load the MCEq splines spline_file = find_resource(self.table_file) logging.info("Loading MCEq spline tables from : %s", spline_file) # Encoding is to support pickle files created with python v2 self.spline_tables_dict = pickle.load(BZ2File(spline_file), encoding="latin1") # Ensure that the user is not loading an incompatible spline for bp in self.barr_param_names: bp_p = bp + '+' # meson bp_m = bp + '-' # antimeson assert bp_p in self.spline_tables_dict.keys(), ( "Gradient parameter '%s' missing from table" % bp_p) assert bp_m in self.spline_tables_dict.keys(), ( "Gradient parameter '%s' missing from table" % bp_m) # Loop over containers for container in self.data: # Grab containers here once to save time # TODO make spline generation script store splines directly in # terms of energy, not ln(energy) true_log_energy = np.log(container["true_energy"]) true_abs_coszen = np.abs(container["true_coszen"]) gradients = container["gradients"] nubar = container["nubar"] # # Flux gradients # # Evaluate splines to get the flux graidents w.r.t the Barr parameter values # Need to correctly map nu/nubar and flavor to the output arrays # Loop over parameters for ( gradient_param_name, gradient_param_idx, ) in self.gradient_param_indices.items(): # nue(bar) self._eval_spline( true_log_energy=true_log_energy, true_abs_coszen=true_abs_coszen, spline=self.spline_tables_dict[gradient_param_name] ["dnue" if nubar > 0 else "dnuebar"], out=gradients[:, 0, gradient_param_idx], ) # numu(bar) self._eval_spline( true_log_energy=true_log_energy, true_abs_coszen=true_abs_coszen, spline=self.spline_tables_dict[gradient_param_name] ["dnumu" if nubar > 0 else "dnumubar"], out=gradients[:, 1, gradient_param_idx], ) # nutau(bar) # TODO include nutau flux in splines # SDB - there is no nutau flux in splines ## gradients[:, 2, gradient_param_idx].fill(0.0) # Tell the smart arrays we've changed the flux gradient values on the host container.mark_changed("gradients") # don't forget to un-link everything again self.data.unlink_containers()
def applyCut(self, keep_criteria): """Apply a cut by specifying criteria for keeping events. The cut must be successfully applied to all flav/ints in the events object before the changes are kept, otherwise the cuts are reverted. Parameters ---------- keep_criteria : string Any string interpretable as numpy boolean expression. Returns ------- remaining_events : Events An Events object with the remaining events (deepcopied) and with updated cut metadata including `keep_criteria`. Examples -------- Keep events with true energies in [1, 80] GeV (note that units are not recognized, so have to be handled outside this method) >>> remaining = applyCut("(true_energy >= 1) & (true_energy <= 80)") Do the opposite with "~" inverting the criteria >>> remaining = applyCut("~((true_energy >= 1) & (true_energy <= 80))") Numpy namespace is available for use via `np` prefix >>> remaining = applyCut("np.log10(true_energy) >= 0") """ # TODO(shivesh): function does not pass tests raise NotImplementedError if keep_criteria in self.metadata['cuts']: return assert isinstance(keep_criteria, basestring) fig_to_process = [] if self.contains_neutrinos: fig_to_process += deepcopy(self.flavint_groups) if self.contains_muons: fig_to_process += ['muons'] if self.contains_noise: fig_to_process += ['noise'] logging.info("Applying cut to %s : %s" % (fig_to_process, keep_criteria)) fig_processed = [] remaining_data = {} for fig in fig_to_process: data_dict = self[fig] field_names = data_dict.keys() # TODO: handle unicode: # * translate crit to unicode (easiest to hack but could be # problematic elsewhere) # * translate field names to ascii (probably should be done at # the from_hdf stage?) # Replace simple field names with full paths into the data that # lives in this object crit_str = (keep_criteria) for field_name in field_names: crit_str = crit_str.replace( field_name, 'self["%s"]["%s"]' % (fig, field_name)) mask = eval(crit_str) remaining_data[fig] = { k: v[mask] for k, v in self[fig].iteritems() } fig_processed.append(fig) remaining_events = Events() remaining_events.metadata.update(deepcopy(self.metadata)) remaining_events.metadata['cuts'].append(keep_criteria) for fig in fig_to_process: remaining_events[fig] = deepcopy(remaining_data.pop(fig)) return remaining_events
def test_get_random_state(): """Unit tests for get_random_state function""" # Instantiate random states in all legal ways rstates = { 0: get_random_state(None), 1: get_random_state('rand'), 2: get_random_state('random'), 3: get_random_state(np.random.RandomState(0)), 4: get_random_state(0), 5: get_random_state([ 0, ]), 6: get_random_state([0, 0]), 7: get_random_state([0, 0, 0]), } rstates[8] = get_random_state(rstates[4].get_state()) # rs 4-8 should be identical ref_id, ref = None, None for rs_id, rs in rstates.items(): if rs_id < 3: continue if ref is None: ref_id = rs_id ref = rs.rand(1000) else: test = rs.rand(1000) assert np.array_equal(test, ref), f'rs{rs_id} != rs{ref_id}' # Already generated 1k, so generating 2k more gets us 3k; pick off last 1k ref = rstates[ref_id].rand(2000)[1000:] test = get_random_state(random_state=0, jumpahead=2000).rand(1000) assert np.array_equal(test, ref), f'jumpahead=1k: rs != rs{ref_id}[2000:3000]' # Test stability of random number generator over time; following were # retrieved on 2020-03-19 using numpy 1.18.1 via .. :: # # np.array2string( # np.random.RandomState(0).rand(100), precision=20, separator=', ' # ) # # pylint: disable=bad-whitespace ref = np.array([ 0.5488135039273248, 0.7151893663724195, 0.6027633760716439, 0.5448831829968969, 0.4236547993389047, 0.6458941130666561, 0.4375872112626925, 0.8917730007820798, 0.9636627605010293, 0.3834415188257777, 0.7917250380826646, 0.5288949197529045, 0.5680445610939323, 0.925596638292661, 0.07103605819788694, 0.08712929970154071, 0.02021839744032572, 0.832619845547938, 0.7781567509498505, 0.8700121482468192, 0.978618342232764, 0.7991585642167236, 0.46147936225293185, 0.7805291762864555, 0.11827442586893322, 0.6399210213275238, 0.1433532874090464, 0.9446689170495839, 0.5218483217500717, 0.4146619399905236, 0.26455561210462697, 0.7742336894342167, 0.45615033221654855, 0.5684339488686485, 0.018789800436355142, 0.6176354970758771, 0.6120957227224214, 0.6169339968747569, 0.9437480785146242, 0.6818202991034834, 0.359507900573786, 0.43703195379934145, 0.6976311959272649, 0.06022547162926983, 0.6667667154456677, 0.6706378696181594, 0.2103825610738409, 0.1289262976548533, 0.31542835092418386, 0.3637107709426226, 0.5701967704178796, 0.43860151346232035, 0.9883738380592262, 0.10204481074802807, 0.2088767560948347, 0.16130951788499626, 0.6531083254653984, 0.2532916025397821, 0.4663107728563063, 0.24442559200160274, 0.15896958364551972, 0.11037514116430513, 0.6563295894652734, 0.1381829513486138, 0.1965823616800535, 0.3687251706609641, 0.8209932298479351, 0.09710127579306127, 0.8379449074988039, 0.09609840789396307, 0.9764594650133958, 0.4686512016477016, 0.9767610881903371, 0.604845519745046, 0.7392635793983017, 0.039187792254320675, 0.2828069625764096, 0.1201965612131689, 0.29614019752214493, 0.11872771895424405, 0.317983179393976, 0.41426299451466997, 0.06414749634878436, 0.6924721193700198, 0.5666014542065752, 0.2653894909394454, 0.5232480534666997, 0.09394051075844168, 0.5759464955561793, 0.9292961975762141, 0.31856895245132366, 0.6674103799636817, 0.13179786240439217, 0.7163272041185655, 0.2894060929472011, 0.18319136200711683, 0.5865129348100832, 0.020107546187493552, 0.8289400292173631, 0.004695476192547066, ]) test = np.random.RandomState(0).rand(100) assert np.array_equal(test, ref), 'random number generator changed!' logging.info('<< PASS : test_get_random_state >>')
def test_kde_stash(verbosity=Levels.WARN): """Unit test for the hist stashing feature. Hist stashing can greatly speed up fits as long as the only free parameters are in stages that work on the output histograms, rather than the individual events. In particular, it should be strictly equivalent to either scale all weights by a factor and then running the KDE, or to first calculate the KDE and then scale all the bin counts by the same factor. This test ensures that the order of operation really doesn't matter. This should apply also to the errors, independent of whether the bootstrapping method or the utils.set_variance stage was used to produce them. """ import pytest from numpy.testing import assert_array_equal, assert_allclose set_verbosity(verbosity) def assert_correct_scaling(pipeline_cfg, fixed_errors=False): """Run the pipeline and assert that scaling by a factor of two is correct.""" dmaker = DistributionMaker([pipeline_cfg]) out = dmaker.get_outputs(return_sum="true")[0] dmaker.pipelines[0].params.weight_scale = 2.0 out2 = dmaker.get_outputs(return_sum="true")[0] if fixed_errors: # this is special: We expect that the nominal counts are multiplied, but # that hte errors stay fixed (applies to set_variance errors) assert_array_equal(out.nominal_values * 2.0, out2.nominal_values) assert_array_equal(out.std_devs, out2.std_devs) else: assert out * 2.0 == out2 ## KDE without errors # First aeff, then KDE test_cfg = deepcopy(TEST_CONFIGS.pipe_cfg) test_cfg[("data", "toy_event_generator")] = deepcopy( TEST_CONFIGS.event_generator_cfg ) test_cfg[("aeff", "weight")] = deepcopy(TEST_CONFIGS.aeff_cfg) test_cfg[("utils", "kde")] = deepcopy(TEST_CONFIGS.kde_cfg) assert_correct_scaling(test_cfg) # First KDE, then aeff, with stashing test_cfg = deepcopy(TEST_CONFIGS.pipe_cfg) test_cfg[("data", "toy_event_generator")] = deepcopy( TEST_CONFIGS.event_generator_cfg ) test_cfg[("utils", "kde")] = deepcopy(TEST_CONFIGS.kde_cfg) test_cfg[("aeff", "weight")] = deepcopy(TEST_CONFIGS.aeff_cfg) # turn on stashing test_cfg[("utils", "kde")]["stash_hists"] = True # Change aeff calculation to binned mode (i.e. multiply bin counts) test_cfg[("aeff", "weight")]["calc_mode"] = TEST_BINNING test_cfg[("aeff", "weight")]["apply_mode"] = TEST_BINNING assert_correct_scaling(test_cfg) ## KDE with bootstrap errors # First aeff, then KDE with bootstrap test_cfg = deepcopy(TEST_CONFIGS.pipe_cfg) test_cfg[("data", "toy_event_generator")] = deepcopy( TEST_CONFIGS.event_generator_cfg ) test_cfg[("aeff", "weight")] = deepcopy(TEST_CONFIGS.aeff_cfg) test_cfg[("utils", "kde")] = deepcopy(TEST_CONFIGS.kde_cfg) # turn OFF stashing test_cfg[("utils", "kde")]["stash_hists"] = False # turn on bootstrapping test_cfg[("utils", "kde")]["bootstrap"] = True # return the errors test_cfg["pipeline"]["output_key"] = ("weights", "errors") assert_correct_scaling(test_cfg) # First KDE with stashed hists and bootstrap, then aeff test_cfg = deepcopy(TEST_CONFIGS.pipe_cfg) test_cfg[("data", "toy_event_generator")] = deepcopy( TEST_CONFIGS.event_generator_cfg ) test_cfg[("utils", "kde")] = deepcopy(TEST_CONFIGS.kde_cfg) test_cfg[("aeff", "weight")] = deepcopy(TEST_CONFIGS.aeff_cfg) # turn on stashing test_cfg[("utils", "kde")]["stash_hists"] = True # turn on bootstrapping test_cfg[("utils", "kde")]["bootstrap"] = True # return the errors test_cfg["pipeline"]["output_key"] = ("weights", "errors") # need to change mode to binned test_cfg[("aeff", "weight")]["calc_mode"] = TEST_BINNING test_cfg[("aeff", "weight")]["apply_mode"] = TEST_BINNING assert_correct_scaling(test_cfg) ## KDE with errors calculated using set_variance stage # first aeff, then KDE and set_variance test_cfg = deepcopy(TEST_CONFIGS.pipe_cfg) test_cfg[("data", "toy_event_generator")] = deepcopy( TEST_CONFIGS.event_generator_cfg ) test_cfg[("aeff", "weight")] = deepcopy(TEST_CONFIGS.aeff_cfg) test_cfg[("utils", "kde")] = deepcopy(TEST_CONFIGS.kde_cfg) test_cfg[("utils", "set_variance")] = deepcopy(TEST_CONFIGS.set_variance_cfg) # turn on stashing test_cfg[("utils", "kde")]["stash_hists"] = False # turn OFF bootstrapping test_cfg[("utils", "kde")]["bootstrap"] = False # return the errors test_cfg["pipeline"]["output_key"] = ("weights", "errors") # The set_variance stage only calculates errors the first time that the pipeline # is evaluated, these errors are stored and re-instated on any sub-sequent # evaluations. We expect therefore that only the nominal values scale. assert_correct_scaling(test_cfg, fixed_errors=True) # first KDE and set_variance, then aeff test_cfg = deepcopy(TEST_CONFIGS.pipe_cfg) test_cfg[("data", "toy_event_generator")] = deepcopy( TEST_CONFIGS.event_generator_cfg ) test_cfg[("utils", "kde")] = deepcopy(TEST_CONFIGS.kde_cfg) test_cfg[("aeff", "weight")] = deepcopy(TEST_CONFIGS.aeff_cfg) # It is still important that the `set_variance` stage is *last*. test_cfg[("utils", "set_variance")] = deepcopy(TEST_CONFIGS.set_variance_cfg) # turn on stashing test_cfg[("utils", "kde")]["stash_hists"] = True # turn OFF bootstrapping test_cfg[("utils", "kde")]["bootstrap"] = False # return the errors test_cfg["pipeline"]["output_key"] = ("weights", "errors") # need to change mode to binned test_cfg[("aeff", "weight")]["calc_mode"] = TEST_BINNING test_cfg[("aeff", "weight")]["apply_mode"] = TEST_BINNING # We ensure that the behavior is the same as it has been when we were not stashing # the histograms and used set_variance. assert_correct_scaling(test_cfg, fixed_errors=True) # Using the wrong order (not putting set_variance last) test_cfg = deepcopy(TEST_CONFIGS.pipe_cfg) test_cfg[("data", "toy_event_generator")] = deepcopy( TEST_CONFIGS.event_generator_cfg ) test_cfg[("utils", "kde")] = deepcopy(TEST_CONFIGS.kde_cfg) # If set_variance is not the last stage, this breaks. The reason is a slightly # silly design of set_variance. It should have been constructed such that the # total normalization is always divided out, but it wasn't. The way it is # constructed now, it is basically tuned by the scaling factor to work for the # given livetime and breaks immediately when that changes. test_cfg[("utils", "set_variance")] = deepcopy(TEST_CONFIGS.set_variance_cfg) test_cfg[("aeff", "weight")] = deepcopy(TEST_CONFIGS.aeff_cfg) # turn on stashing test_cfg[("utils", "kde")]["stash_hists"] = True # turn OFF bootstrapping test_cfg[("utils", "kde")]["bootstrap"] = False # return the errors test_cfg["pipeline"]["output_key"] = ("weights", "errors") # need to change mode to binned test_cfg[("aeff", "weight")]["calc_mode"] = TEST_BINNING test_cfg[("aeff", "weight")]["apply_mode"] = TEST_BINNING # With the wrong order, this will fail. # FIXME: If someone changes the behavior of set_variance in the future to be # more robust, they are welcome to change this unit test. with pytest.raises(AssertionError): assert_correct_scaling(test_cfg, fixed_errors=True) logging.info("<< PASS : kde_stash >>")
def test_BinnedTensorTransform(): """Unit tests for BinnedTensorTransform class""" binning = MultiDimBinning([ dict(name='energy', is_log=True, domain=(1, 80) * ureg.GeV, num_bins=10), dict(name='coszen', is_lin=True, domain=(-1, 0), num_bins=5) ]) nue_map = Map(name='nue', binning=binning, hist=np.random.random(binning.shape)) nue_map.set_poisson_errors() numu_map = Map(name='numu', binning=binning, hist=np.random.random(binning.shape)) numu_map.set_poisson_errors() inputs = MapSet( name='inputs', maps=[nue_map, numu_map], ) xform0 = BinnedTensorTransform(input_names='nue', output_name='nue', input_binning=binning, output_binning=binning, xform_array=2 * np.ones(binning.shape)) xform1 = BinnedTensorTransform(input_names=['numu'], output_name='numu', input_binning=binning, output_binning=binning, xform_array=3 * np.ones(binning.shape)) xform2 = BinnedTensorTransform( input_names=['nue', 'numu'], output_name='nue_numu', input_binning=binning, output_binning=binning, xform_array=np.stack( [2 * np.ones(binning.shape), 3 * np.ones(binning.shape)], axis=0)) assert np.all((xform2 + 2).xform_array - xform2.xform_array == 2) testdir = tempfile.mkdtemp() try: for i, t in enumerate([xform0, xform1, xform2]): t_file = os.path.join(testdir, str(i) + '.json') t.to_json(t_file) t_ = BinnedTensorTransform.from_json(t_file) assert t_ == t, 't=\n%s\nt_=\n%s' % (t, t_) finally: shutil.rmtree(testdir, ignore_errors=True) logging.info('<< PASS : test_BinnedTensorTransform >>') xforms = TransformSet(name='scaling', transforms=[xform0, xform1, xform2], hash=9) assert xforms.hash == 9 xforms.hash = -20 assert xforms.hash == -20 _ = xforms.apply(inputs) # TODO: get this working above, then test here! #xforms2 = xforms * 2 testdir = tempfile.mkdtemp() try: for i, t in enumerate([xforms]): t_filename = os.path.join(testdir, str(i) + '.json') t.to_json(t_filename) t_ = TransformSet.from_json(t_filename) assert t_ == t, 't=\n%s\nt_=\n%s' % (t.transforms, t_.transforms) finally: shutil.rmtree(testdir, ignore_errors=True) logging.info('<< PASS : test_TransformSet >>')
default="event_rate.json", help='''file to store the output''') parser.add_argument('-v', '--verbose', action='count', default=None, help='''set verbosity level''') args = parser.parse_args() #Set verbosity level set_verbosity(args.verbose) #Check binning ebins, czbins = check_binning(args.osc_flux_maps) logging.info("Defining aeff_service...") if args.mc_mode: logging.info(" Using effective area from EVENT DATA...") aeff_service = AeffServiceMC(ebins, czbins, aeff_weight_file=args.weighted_aeff_file) else: logging.info(" Using effective area from PARAMETRIZATION...") aeff_settings = from_json(find_resource(args.settings_file)) aeff_service = AeffServicePar(ebins, czbins, **aeff_settings) event_rate_maps = get_event_rates(args.osc_flux_maps, aeff_service, args.livetime, args.nu_nubar_ratio, args.aeff_scale)
def test_find_index(): """Unit tests for `find_index` function. Correctness is defined as producing the same histogram as numpy.histogramdd by using the output of `find_index` (ignoring underflow and overflow values). Additionally, -1 should be returned if a value is below the range (underflow) or is nan, and num_bins should be returned for a value above the range (overflow). """ # Negative, positive, integer, non-integer, binary-unrepresentable (0.1) edges basic_bin_edges = [-1, -0.5, -0.1, 0, 0.1, 0.5, 1, 2, 3, 4] failures = 0 for basic_bin_edges in [ # Negative, positive, integer, non-integer, binary-unrepresentable (0.1) edges [-1, -0.5, -0.1, 0, 0.1, 0.5, 1, 2, 3, 4], # A single infinite bin: [-np.inf, np.inf] [], # Half-infinite bins (lower or upper edge) & [-inf, .1, +inf] [0.1], # Single bin with finite edges & +/-inf-edge(s)-added variants [-0.1, 0.1], ]: # Bin edges from above, w/ and w/o +/-inf as left and/or right edges for le, re in [(None, None), (-np.inf, None), (None, np.inf), (-np.inf, np.inf)]: bin_edges = deepcopy(basic_bin_edges) if le is not None: bin_edges = [le] + bin_edges if re is not None: bin_edges = bin_edges + [re] if len(bin_edges) < 2: continue logging.debug('bin_edges being tested: %s', bin_edges) bin_edges = np.array(bin_edges, dtype=FTYPE) num_bins = len(bin_edges) - 1 underflow_idx = -1 overflow_idx = num_bins # # Construct test values to try out # non_finite_vals = [-np.inf, +np.inf, np.nan] # Values within bins (i.e., not on edges) inbin_vals = [] for idx in range(len(bin_edges) - 1): lower_be = bin_edges[idx] upper_be = bin_edges[idx + 1] if np.isfinite(lower_be): if np.isfinite(upper_be): inbin_val = (lower_be + upper_be) / 2 else: inbin_val = lower_be + 10.5 else: if np.isfinite(upper_be): inbin_val = upper_be - 10.5 else: inbin_val = 10.5 inbin_vals.append(inbin_val) # Values above/below bin edges by one unit of floating point # accuracy eps = np.finfo(FTYPE).eps # pylint: disable=no-member below_edges_vals = [FTYPE((1 - eps) * be) for be in bin_edges] above_edges_vals = [FTYPE((1 + eps) * be) for be in bin_edges] test_vals = np.concatenate([ non_finite_vals, bin_edges, inbin_vals, below_edges_vals, above_edges_vals, ]) logging.trace('test_vals = %s', test_vals) # # Run tests # for val in test_vals: val = FTYPE(val) np_histvals, _ = np.histogramdd([val], np.atleast_2d(bin_edges)) nonzero_indices = np.nonzero(np_histvals)[ 0] # select first & only dim if np.isnan(val): assert len(nonzero_indices) == 0, str(len(nonzero_indices)) expected_idx = underflow_idx elif val < bin_edges[0]: assert len(nonzero_indices) == 0, str(len(nonzero_indices)) expected_idx = underflow_idx elif val > bin_edges[-1]: assert len(nonzero_indices) == 0, str(len(nonzero_indices)) expected_idx = overflow_idx else: assert len(nonzero_indices) == 1, str(len(nonzero_indices)) expected_idx = nonzero_indices[0] found_idx = find_index(val, bin_edges) if found_idx != expected_idx: failures += 1 msg = 'val={}, edges={}: Expected idx={}, found idx={}'.format( val, bin_edges, expected_idx, found_idx) logging.error(msg) assert failures == 0, f"{failures} failures, inspect ERROR messages above for info" logging.info('<< PASS : test_find_index >>')
def plot_prior(obj, param=None, x_xform=None, ax1=None, ax2=None, **plt_kwargs): """Plot prior for param from template settings, params, or prior filename or dict. Arguments --------- obj : str or dict if str, interpret as path from which to load a dict if (nested) dict, (innermost) must be dict of prior properties : either supply `param` to choose which parameter's prior in `obj` to plot, or prior dict, in which case `param` need not be specified param Param name to plot; necessary if obj is either pipeline settings or params dict x_xform Transform to apply to x-values. E.g., to plot against sin^2 theta, use x_xform = lambda x: np.sin(x)**2 ax1, ax2 Axes onto which to plot LLH and chi-squared, respectively. If none are provided, new figures & axes will be created. plt_kwargs Keyword arguments to pass on to the plot function Returns ------- ax1, ax2 The axes onto which plots were drawn (ax1 = LLH, ax2 = chi^2) """ import matplotlib as mpl mpl.use('pdf') import matplotlib.pyplot as plt if isinstance(obj, basestring): obj = from_file(obj) if param is not None and param in obj: obj = obj[param] if 'prior' in obj: obj = obj['prior'] prior = Prior(**obj) logging.info('Plotting Prior: %s', prior) x0 = prior.valid_range[0] x1 = prior.valid_range[1] if prior.kind == 'gaussian': x0 = max(x0, prior.max_at - 5 * prior.stddev) x1 = min(x1, prior.max_at + 5 * prior.stddev) if np.isinf(x0): x0 = -1 if np.isinf(x1): x1 = +1 # if prior.units is None, will result in dimensionless quantity x = ureg.Quantity(np.linspace(x0, x1, 5000), prior.units) llh = prior.llh(x) chi2 = prior.chi2(x) if x_xform is not None: x = x_xform(x) if ax1 is None: f = plt.figure() ax1 = f.add_subplot(111) if ax2 is None: f = plt.figure() ax2 = f.add_subplot(111) ax1.plot(x, llh, **plt_kwargs) ax2.plot(x, chi2, **plt_kwargs) ax1.set_title(str(prior), fontsize=8, y=1.02) ax2.set_title(str(prior), fontsize=8, y=1.02) ax1.set_xlabel(param) ax2.set_xlabel(param) ax1.set_ylabel('LLH') ax2.set_ylabel(r'$\Delta\chi^2$') return ax1, ax2
def setup_function(self): self.data.representation = self.calc_mode # # Init arrays # # Prepare some array shapes gradient_params_shape = (len(self.gradient_param_names), ) if self.data.is_map: # speed up calculation by adding links # as nominal flux doesn't depend on the (outgoing) flavour self.data.link_containers('nu', [ 'nue_cc', 'numu_cc', 'nutau_cc', 'nue_nc', 'numu_nc', 'nutau_nc' ]) self.data.link_containers('nubar', [ 'nuebar_cc', 'numubar_cc', 'nutaubar_cc', 'nuebar_nc', 'numubar_nc', 'nutaubar_nc' ]) # Loop over containers for container in self.data: #TODO Toggles for including both nu and nubar flux (required for CPT violating oscillations) # Flux container shape : [ N events, N flavors in primary flux ] num_events = container.size num_flux_flavs = 3 if self.include_nutau_flux else 2 flux_container_shape = (num_events, num_flux_flavs) # Gradients container shape gradients_shape = tuple( list(flux_container_shape) + list(gradient_params_shape)) # Create arrays that will be populated in the stage # Note that the flux arrays will be chosen as nu or nubar depending # on the container (e.g. not simultaneously storing nu and nubar) # Would rather use multi-dim arrays here but limited by fact that # numba only supports 1/2D versions of numpy functions container["nu_flux_nominal"] = np.full(flux_container_shape, np.NaN, dtype=FTYPE) container["nu_flux"] = np.full(flux_container_shape, np.NaN, dtype=FTYPE) container["gradients"] = np.full(gradients_shape, np.NaN, dtype=FTYPE) # Also create an array container to hold the gradient parameter values # Only want this once, e.g. not once per container self.gradient_params = np.empty(gradient_params_shape, dtype=FTYPE) # # Load MCEq splines # # Have splined both nominal fluxes and gradients in flux w.r.t. # Barr parameters, using MCEQ. # Have splines for each Barr parameter, plus +/- versions of each # Barr parameter corresponding to mesons/antimesons. # For a given Barr parameter, an underlying dictionary have the following # keywords: # "numu", "numubar", "nue", "nuebar" # derivatives: "dnumu", "dnumubar", "dnue", dnuebar" # Units are changed to m^-2 in creates_splines.., rather than cm^2 which # is the unit of calculation in MCEq # Note that doing this all on CPUs, since the splines reside on the CPUs # The actual `compute_function` computation can be done on GPUs though # Load the MCEq splines spline_file = find_resource(self.table_file) logging.info("Loading MCEq spline tables from : %s", spline_file) # Encoding is to support pickle files created with python v2 self.spline_tables_dict = pickle.load(BZ2File(spline_file), encoding="latin1") # Ensure that the user is not loading an incompatible spline for bp in self.barr_param_names: bp_p = bp + '+' # meson bp_m = bp + '-' # antimeson assert bp_p in self.spline_tables_dict.keys(), ( "Gradient parameter '%s' missing from table" % bp_p) assert bp_m in self.spline_tables_dict.keys(), ( "Gradient parameter '%s' missing from table" % bp_m) # Loop over containers for container in self.data: # Grab containers here once to save time # TODO make spline generation script store splines directly in # terms of energy, not ln(energy) true_log_energy = np.log(container["true_energy"]) true_abs_coszen = np.abs(container["true_coszen"]) nu_flux_nominal = container["nu_flux_nominal"] gradients = container["gradients"] nubar = container["nubar"] # # Nominal flux # if not self.use_honda_nominal_flux: # Evaluate splines to get nominal flux # Need to correctly map nu/nubar and flavor to the output arrays # Note that nominal flux is stored multiple times (once per Barr parameter) # Choose an arbitrary one to get the nominal fluxes arb_gradient_param_key = self.gradient_param_names[0] # nue(bar) nu_flux_nominal[:, 0] = self.spline_tables_dict[ arb_gradient_param_key]["nue" if nubar > 0 else "nuebar"]( true_abs_coszen, true_log_energy, grid=False, ) # numu(bar) nu_flux_nominal[:, 1] = self.spline_tables_dict[ arb_gradient_param_key][ "numu" if nubar > 0 else "numubar"]( true_abs_coszen, true_log_energy, grid=False, ) # nutau(bar) # Currently setting to 0 #TODO include nutau flux (e.g. prompt) in splines if self.include_nutau_flux: nu_flux_nominal[:, 2] = self.spline_tables_dict[ arb_gradient_param_key][ "nutau" if nubar > 0 else "nutaubar"]( true_abs_coszen, true_log_energy, grid=False, ) # Tell the smart arrays we've changed the nominal flux values on the host container.mark_changed("nu_flux_nominal") # # Flux gradients # # Evaluate splines to get the flux graidents w.r.t the Barr parameter values # Need to correctly map nu/nubar and flavor to the output arrays # Loop over parameters for ( gradient_param_name, gradient_param_idx, ) in self.gradient_param_indices.items(): # nue(bar) gradients[:, 0, gradient_param_idx] = self.spline_tables_dict[ gradient_param_name]["dnue" if nubar > 0 else "dnuebar"]( true_abs_coszen, true_log_energy, grid=False, ) # numu(bar) gradients[:, 1, gradient_param_idx] = self.spline_tables_dict[ gradient_param_name]["dnumu" if nubar > 0 else "dnumubar"]( true_abs_coszen, true_log_energy, grid=False, ) # nutau(bar) if self.include_nutau_flux: gradients[:, 2, gradient_param_idx] = self.spline_tables_dict[ gradient_param_name][ "dnutau" if nubar > 0 else "dnutaubar"]( true_abs_coszen, true_log_energy, grid=False, ) # Tell the smart arrays we've changed the flux gradient values on the host container.mark_changed("gradients") # don't forget to un-link everything again self.data.unlink_containers()
def scan_allsyst(template_settings, steps, hypo_param_selections, outdir, minimizer_settings, metric, debug_mode): """Scan (separately) all systematics (i.e., non-fixed params). Parameters ---------- template_settings steps hypo_param_selections outdir minimizer_settings metric debug_mode Returns ------- restults : dict Keys are param names, values are the scan results """ outdir = expanduser(expandvars(outdir)) mkdir(outdir, warn=False) hypo_maker = DistributionMaker(template_settings) hypo_maker.select_params(hypo_param_selections) data_dist = hypo_maker.get_outputs(return_sum=True) minimizer_settings = from_file(minimizer_settings) analysis = Analysis() results = OrderedDict() # pylint: disable=redefined-outer-name for param in hypo_maker.params: if param.is_fixed: continue logging.info('Scanning %s', param.name) nominal_value = param.value outfile = join( outdir, '{:s}_{:d}_steps_{:s}_scan.json'.format(param.name, steps, metric)) if isfile(outfile): raise IOError( '`outfile` "{}" exists, not overwriting.'.format(outfile)) results[param.name] = analysis.scan( data_dist=data_dist, hypo_maker=hypo_maker, hypo_param_selections=hypo_param_selections, metric=metric, param_names=param.name, steps=steps, only_points=None, outer=True, profile=False, minimizer_settings=minimizer_settings, outfile=outfile, debug_mode=debug_mode) to_file(results[param.name], outfile) param.value = nominal_value logging.info('Done scanning param "%s"', param.name) logging.info('Done.') return results
def test_Prior(): """Unit tests for Prior class""" uniform = Prior(kind='uniform', llh_offset=1.5) gaussian = Prior(kind='gaussian', mean=10, stddev=1) x = np.linspace(-10, 10, 100) y = x**2 linterp = Prior(kind='linterp', param_vals=x * ureg.meter, llh_vals=y) param_vals = np.linspace(-10, 10, 100) llh_vals = x**2 knots, coeffs, deg = splrep(param_vals, llh_vals) spline = Prior(kind='spline', knots=knots * ureg.foot, coeffs=coeffs, deg=deg) param_upsamp = np.linspace(-10, 10, 1000) * ureg.foot llh_upsamp = splev(param_upsamp, tck=(knots, coeffs, deg), ext=2) assert all(spline.llh(param_upsamp) == llh_upsamp) # Asking for param value outside of range should fail try: linterp.llh(-1000 * ureg.mile) except ValueError: pass else: assert False try: linterp.chi2(-1000 * ureg.km) except ValueError: pass else: assert False try: spline.llh(-1000 * ureg.meter) except ValueError: pass else: assert False try: spline.chi2(+1000 * ureg.meter) except ValueError: pass else: assert False # Asking for param value when units were used should fail try: spline.llh(10) except TypeError: pass else: assert False # ... or vice versa try: gaussian.llh(10 * ureg.meter) except (TypeError, pint.DimensionalityError): pass else: assert False logging.info('<< PASS : test_Prior >>')