def __init__(
        self, ebins, czbins, detector_depth=None, earth_model=None, prop_height=None, osc_precision=5e-4, **kwargs
    ):
        """
        Parameters needed to instantiate a NucraftOscillationService:
        * ebins: Energy bin edges
        * czbins: cos(zenith) bin edges
        * earth_model: Earth density model used for matter oscillations.
        * detector_depth: Detector depth in km.
        * prop_height: Height in the atmosphere to begin in km.
                       Default: 'sample', samples from a parametrization to
                       the atmospheric interaction model presented in
                       "Path length distributions of atmospheric neutrinos",
                       Gaisser and Stanev, PhysRevD.57.1977
        * osc_precision: Numerical precision for oscillation probabilities
        """
        OscillationServiceBase.__init__(self, ebins, czbins)
        logging.info("Initializing %s..." % self.__class__.__name__)

        print get_params()
        report_params(get_params(), ["km", "", "", "km"])

        self.prop_height = prop_height  # km above spherical Earth surface
        # print "\n\n self.prop_height: ",self.prop_height
        """ height_mode = 0 ensures that interaction takes place at chosen height """
        """ whereas height_mode = 1 samples single altitude from distribution """
        self.height_mode = 3 if self.prop_height == "sample" else 0
        logging.debug("NuCraft height mode: %d" % self.height_mode)
        self.detector_depth = detector_depth  # km below spherical Earth surface
        self.num_prec = osc_precision
        self.get_earth_model(earth_model)
Beispiel #2
0
def build_fisher_matrix(gradient_maps, fiducial_map, template_settings):

  # fix the ordering of parameters
  params = gradient_maps.keys()
  fisher = {}
  for chan in gradient_maps[params[0]]:

    #Find non-empty bins in flattened map
    nonempty = np.nonzero(fiducial_map[chan]['map'].flatten())
    logging.info("Using %u non-empty bins of %u"%(len(nonempty[0]),
                                                  len(fiducial_map[chan]['map'].flatten())))

    #get gradients as calculated above for non-zero bins
    gradients = np.array([gradient_maps[par][chan]['map'].flatten()[nonempty] for par in params])
    # get error estimate from best-fit bin count for non-zero bins
    sigmas = np.sqrt(fiducial_map[chan]['map'].flatten()[nonempty])

    #Loop over all parameter per bin (simple transpose) and calculate Fisher
    #matrix per by getting the outer product of all gradients in a bin.
    #Result is sum of matrix for all bins
    fmatrix = np.zeros((len(params), len(params)))
    for bin_gradients, bin_sigma in zip(gradients.T,sigmas.flatten()):
      fmatrix += np.outer(bin_gradients, bin_gradients)/bin_sigma**2
    
    #And construct the fisher matrix object
    fisher[chan] = FisherMatrix(matrix=fmatrix,
                                   parameters=params,  #order is important here!
                                   best_fits=[template_settings[par]['value'] for par in params],
                                   priors=[template_settings[par]['prior'] for par in params],
                                   )

  #Return all fisher matrices
  return fisher
Beispiel #3
0
    def __init__(self, flux_file=None, smooth=0.05, **params):
        logging.info("Loading atmospheric flux table %s" %flux_file)

        #Load the data table
        table = np.loadtxt(open_resource(flux_file)).T

        #columns in Honda files are in the same order
        cols = ['energy']+primaries

        flux_dict = dict(zip(cols, table))
        for key in flux_dict.iterkeys():

            #There are 20 lines per zenith range
            flux_dict[key] = np.array(np.split(flux_dict[key], 20))
            if not key=='energy':
                flux_dict[key] = flux_dict[key].T

        #Set the zenith and energy range
        flux_dict['energy'] = flux_dict['energy'][0]
        flux_dict['coszen'] = np.linspace(0.95, -0.95, 20)

        #Now get a spline representation of the flux table.
        logging.debug('Make spline representation of flux')
        # do this in log of energy and log of flux (more stable)
        logE, C = np.meshgrid(np.log10(flux_dict['energy']), flux_dict['coszen'])

        self.spline_dict = {}
        for nutype in primaries:
            #Get the logarithmic flux
            log_flux = np.log10(flux_dict[nutype]).T
            #Get a spline representation
            spline =  bisplrep(logE, C, log_flux, s=smooth)
            #and store
            self.spline_dict[nutype] = spline
Beispiel #4
0
 def get_osc_probLT_dict(self,deltam21=None,deltam31=None,theta12=None, 
                         theta13=None,theta23=None,deltacp=None,**kwargs):
     """
     Returns an oscillation probability map dictionary calculated 
     at the values of the input parameters:
       deltam21,deltam31,theta12,theta13,theta23,deltacp
     for flavor_from to flavor_to, with the binning of ebins,czbins.
     The dictionary is formatted as:
       'nue_maps': {'nue':map,'numu':map,'nutau':map},
       'numu_maps': {...}
       'nue_bar_maps': {...}
       'numu_bar_maps': {...}
     """
 
     ########################################################################
     ### TRUE ALGORITHM WHEN WE DECIDE ON HOW TO HANDLE OSC PROB DATA     ###
     # step 1: identify where the data is located: on disk or on server?    #
     # step 2: downsample these maps if not already done, for ebins, czbins #
     # step 3: do interpolation in oscillation parameters to arrive at the  #
     #         maps for (deltam21,deltam31,theta12,theta13,theta23,deltacp) #
     # return dictionary of smoothed, interpolated map.                     #
     ########################################################################
     
     if deltam31 > 0.0:
       filename = os.path.join(self.datadir,'oscProbLT_dm31_0.246_th23_38.645.hdf5') 
     else:
       filename = os.path.join(self.datadir+'oscProbLT_dm31_-0.238_th23_38.645.hdf5')
     logging.info("Loading file: %s"%filename)
     osc_probLT_dict = get_osc_probLT_dict_hdf5(filename)
     
     return osc_probLT_dict
Beispiel #5
0
    def check_kernels(self, kernels):
        """Test whether the reco kernels have the correct shape."""
        # check axes
        logging.debug('Checking binning of reconstruction kernels')
        for kernel_axis, own_axis in [(kernels['ebins'], self.ebins),
                                      (kernels['czbins'], self.czbins)]:
            if not utils.is_equal_binning(kernel_axis, own_axis):
                raise ValueError("Binning of reconstruction kernel doesn't "
                                 "match the event maps!")

        # check shape of kernels
        logging.debug('Checking shape of reconstruction kernels')
        shape = (len(self.ebins)-1, len(self.czbins)-1,
                 len(self.ebins)-1, len(self.czbins)-1)
        for flavour in kernels:
            if flavour in ['ebins', 'czbins']:
                continue
            for interaction in kernels[flavour]:
                if not np.shape(kernels[flavour][interaction]) == shape:
                    raise IndexError(
                        'Reconstruction kernel for %s/%s has wrong shape: '
                        '%s, %s' %(flavour, interaction, str(shape),
                                   str(np.shape(kernels[flavour][interaction])))
                    )

        logging.info('Reconstruction kernels are sane')
        return True
Beispiel #6
0
    def __init__(self,ebins,czbins,particle_ID=None,**kwargs):

        logging.info('Initializing PIDServicePar...')

        #Evaluate the functions at the bin centers
        ecen = get_bin_centers(ebins)
        czcen = get_bin_centers(czbins)

        self.pid_maps = {}
        for signature in particle_ID.keys():
            #Generate the functions
            to_trck_func = eval(particle_ID[signature]['trck'])
            to_cscd_func = eval(particle_ID[signature]['cscd'])

            #Make maps from the functions evaluated at the bin centers
            _,to_trck_map = np.meshgrid(czcen, to_trck_func(ecen))
            _,to_cscd_map = np.meshgrid(czcen, to_cscd_func(ecen))

            for label,pidmap in [('Track',to_trck_map),('Cascade',to_cscd_map)]:
                if (pidmap < 0).any():
                    raise ValueError('%s PID probabilites can not be negative!'
                        ' Investigate parameterization'%label)

            self.pid_maps[signature] = {'trck':to_trck_map,
                                        'cscd':to_cscd_map}
Beispiel #7
0
def get_hierarchy_gradients(data_tag, fiducial_maps, fiducial_params,
                            grid_settings, store_dir):
  """
  Use the hierarchy interpolation between the two fiducial maps to obtain the
  gradients.
  """
  logging.info("Working on parameter hierarchy.")

  steps = get_steps('hierarchy', grid_settings, fiducial_params)

  hmap = {step:{'trck':{},'cscd':{}} for step in steps}

  for h in steps:
    for channel in ['trck','cscd']:
   	# Superpose bin counts
    	hmap[h][channel]['map'] = fiducial_maps['NMH'][channel]['map']*h + fiducial_maps['IMH'][channel]['map']*(1.-h)
	# Obtain binning from one of the maps, since identical by construction (cf. FisherAnalysis)
	hmap[h][channel]['ebins'] = fiducial_maps['NMH'][channel]['ebins']
	hmap[h][channel]['czbins'] = fiducial_maps['NMH'][channel]['czbins']

  # TODO: give hmap the same structure as pmaps?
  # Get_derivative_map works even if 'params' and 'ebins','czbins' not in 'data'

  # Store the maps used to calculate partial derivatives
  if store_dir != tempfile.gettempdir():
  	logging.info("Writing maps for parameter 'hierarchy' to %s"%store_dir)
  to_json(hmap,os.path.join(store_dir,"hierarchy_"+data_tag+".json"))

  gradient_map = get_derivative_map(hmap, fiducial_params['hierarchy'],degree=2)

  return gradient_map
Beispiel #8
0
    def __init__(self,ebins,czbins,aeff_egy_par,aeff_coszen_par,**params):
        '''
        Parameters:
        * aeff_egy_par - effective area vs. Energy 1D parameterizations for each flavor,
        in a text file (.dat)
        * aeff_coszen_par - 1D coszen parameterization for each flavor as a json_string
        code.
        '''
        logging.info('Initializing AeffServicePar...')

        self.ebins = ebins
        self.czbins = czbins


        ## Load the info from .dat files into a dict...
        ## Parametric approach treats all NC events the same
        aeff2d_nc = self.get_aeff_flavor('NC',aeff_egy_par,aeff_coszen_par)
        aeff2d_nc_bar = self.get_aeff_flavor('NC_bar',aeff_egy_par,aeff_coszen_par)

        self.aeff_dict = {}
        logging.info("Creating effective area parametric dict...")
        for flavor in ['nue','nue_bar','numu','numu_bar','nutau','nutau_bar']:
            flavor_dict = {}
            logging.debug("Working on %s effective areas"%flavor)

            aeff2d = self.get_aeff_flavor(flavor,aeff_egy_par,aeff_coszen_par)

            flavor_dict['cc'] = aeff2d
            flavor_dict['nc'] = aeff2d_nc_bar if 'bar' in flavor else aeff2d_nc

            self.aeff_dict[flavor] = flavor_dict

        return
Beispiel #9
0
def sampleHypercube(n_dim, n_samp, rand_set_id=0, crit='m', iterations=5,
                    rdata_dir='~/cowen/data/random'):
    """Load (if file exists) or generate samples from within hypercube using
    Latin hypercube sampling

    Requires pyDOE to generate new samples.
    """
    fname = samplesFilename(n_dim=n_dim,
                            n_samp=n_samp,
                            rand_set_id=rand_set_id,
                            crit=crit,
                            iterations=iterations)
    rdata_dir = os.path.expandvars(os.path.expanduser(rdata_dir))
    fpath = os.path.join(rdata_dir, fname)

    if os.path.exists(fpath):
        samps = fileio.from_file(fpath)
    else:
        logging.info('File not found. Generating new set of samples & saving'
                     ' result to "%s"', fpath)
        import pyDOE
        mkdir(rdata_dir)
        # Set a deterministic random state based upon the critical hypercube
        # sampling parameters specified
        n_bad_seeds(n_dim, n_samp, rand_set_id)
        samps = pyDOE.lhs(n=n_dim, samples=n_samp, criterion=crit,
                          iterations=iterations)
        fileio.to_file(samps, fpath)
    return samps
Beispiel #10
0
def oversample_binning(coarse_bins, factor):
    """
    Oversample bin edges (coarse_bins) by the given factor
    """

    if is_linear(coarse_bins):
        logging.info('Oversampling linear output binning by factor %i.'
                %factor)
        fine_bins = np.linspace(coarse_bins[0], coarse_bins[-1],
                                factor*(len(coarse_bins)-1)+1)
    elif is_logarithmic(coarse_bins):
        logging.info('Oversampling logarithmic output binning by factor %i.'
                %factor)
        fine_bins = np.logspace(np.log10(coarse_bins[0]),
                                np.log10(coarse_bins[-1]),
                                factor*(len(coarse_bins)-1)+1)
    else:
        logging.warn('Irregular binning detected! Evenly oversampling '
                     'by factor %i'%factor)
        fine_bins = np.array([])
        for i, upper_edge in enumerate(coarse_bins[1:]):
            fine_bins = np.append(fine_bins,
                                  np.linspace(coarse_bins[i], upper_edge,
                                              factor, endpoint=False))

    return fine_bins
    def __init__(self, ebins, czbins, detector_depth=None, earth_model=None,
                 prop_height=None, oversample_e=None,oversample_cz=None,
                 **kwargs):
        """
        \params:
          * ebins: Energy bin edges
          * czbins: cos(zenith) bin edges
          * earth_model: Earth density model used for matter oscillations.
          * detector_depth: Detector depth in km.
          * prop_height: Height in the atmosphere to begin in km.
        """

        logging.info('Instantiating %s'%self.__class__.__name__)
        self.ebins = np.array(ebins)
        self.czbins = np.array(czbins)
        for ax in [self.ebins, self.czbins]:
            if (len(np.shape(ax)) != 1):
                raise IndexError('Axes must be 1d! '+str(np.shape(ax)))

        report_params(get_params(),['km','','','','km'])

        earth_model = find_resource(earth_model)
        self.earth_model = earth_model
        self.FTYPE = np.float64

        self.ebins_fine = oversample_binning(self.ebins, oversample_e)
        self.czbins_fine = oversample_binning(self.czbins, oversample_cz)
        self.ecen_fine = get_bin_centers(self.ebins_fine)
        self.czcen_fine = get_bin_centers(self.czbins_fine)

        self.initialize_kernel()

        return
Beispiel #12
0
def get_gradients(data_tag, param, template_maker, fiducial_params,
                  grid_settings, store_dir):
  """
  Use the template maker to create all the templates needed to obtain the gradients.
  """
  logging.info("Working on parameter %s."%param)

  steps = get_steps(param, grid_settings, fiducial_params)

  pmaps = {}

  # Generate one template for each value of the parameter in question and store in pmaps
  for param_value in steps:

      # Make the template corresponding to the current value of the parameter
      with Timer() as t:
          maps = template_maker.get_template(
              get_values(dict(fiducial_params,**{param:dict(fiducial_params[param],
                                                            **{'value': param_value})})))
      tprofile.info("==> elapsed time for template: %s sec"%t.secs)

      pmaps[param_value] = maps

  # Store the maps used to calculate partial derivatives
  if store_dir != tempfile.gettempdir():
  	logging.info("Writing maps for parameter %s to %s"%(param,store_dir))

  to_json(pmaps, os.path.join(store_dir,param+"_"+data_tag+".json"))

  gradient_map = get_derivative_map(pmaps,fiducial_params[param],degree=2)

  return gradient_map
Beispiel #13
0
def mkdir(d, mode=0750):
    try:
        os.makedirs(os.path.expandvars(os.path.expanduser(d)), mode=mode)
    except OSError as err:
        if err[0] != 17:
            raise err
    else:
        logging.info('Created directory: ' + d + '\n')
Beispiel #14
0
 def kernel_from_simfile(self, simfile=None, **kwargs):
     logging.info("Opening file: %s" % (simfile))
     try:
         fh = h5py.File(find_resource(simfile), "r")
     except IOError, e:
         logging.error("Unable to open event data file %s" % simfile)
         logging.error(e)
         sys.exit(1)
Beispiel #15
0
    def __init__(self,ebins,czbins,aeff_weight_file=None,**kwargs):
        self.ebins = ebins
        self.czbins = czbins
        logging.info('Initializing AeffServiceMC...')

        logging.info('Opening file: %s'%(aeff_weight_file))
        try:
            fh = h5py.File(find_resource(aeff_weight_file),'r')
        except IOError,e:
            logging.error("Unable to open aeff_weight_file %s"%aeff_weight_file)
            logging.error(e)
            sys.exit(1)
Beispiel #16
0
def get_llh_hypothesis(
        data_tag, asimov_data, ntrials, template_maker, template_params,
        minimizer_settings, save_steps, check_octant):
    """
    Runs the llh fitter ntrials number of times, pulling pseudo data sets from
    asimov_data.

    \Params:
      * data_tag - hierarchy type running for assumed true.
      * asimov_data - asimov (unfluctuated) data from which to generate poisson
        fluctuated pseudo data
      * ntrials - number of trials to run for each hierarchy hypothesis
      * template_maker - instance of TemplateMaker class, from which to fit pseudo
        data to
      * template_params - dictionary of parameters at which to test the pseudo
        data and find the best match llh
      * minimizer_settings - settings for bfgs minimizer in llh fit
      * save_steps - flag to save the optimizer steps
      * check_octant - boolean to check both octants of theta23

    \returns - trials list that holds the dictionaries of llh results.
    """

    trials = []
    for itrial in xrange(1,ntrials+1):
        results = {} # one trial of results

        tprofile.info("start trial %d"%itrial)
        logging.info(">"*10 + "Running trial: %05d"%itrial + "<"*10)

        results['seed'] = get_seed()
        logging.info("  RNG seed: %ld"%results['seed'])
        # Get random map generated from asimov data (or from data_tag).
        fmap = get_random_map(asimov_data, seed=results['seed'])

        for hypo_tag, hypo_normal in [('hypo_NMH',True),('hypo_IMH',False)]:

            physics.info(
                "Finding best fit for %s under %s assumption"%(data_tag,hypo_tag))
            with Timer() as t:
                llh_data = find_max_llh_bfgs(
                    fmap, template_maker, template_params,
                    minimizer_settings, save_steps,
                    normal_hierarchy=hypo_normal, check_octant=check_octant)
            tprofile.info("==> elapsed time for optimizer: %s sec"%t.secs)

            # Store the LLH data
            results[hypo_tag] = llh_data

        trials += [results]
        tprofile.info("stop trial %d"%itrial)

    return trials
Beispiel #17
0
    def __init__(self,ebins,czbins,reco_weight_file=None,**kwargs):
        self.ebins = ebins
        self.czbins = czbins

        logging.info("Initializing RecoService...")

        logging.info('Opening file: %s'%(reco_weight_file))
        try:
            fh = h5py.File(find_resource(reco_weight_file),'r')
        except IOError,e:
            logging.error("Unable to open event data file %s"%reco_weight_file)
            logging.error(e)
            sys.exit(1)
    def __init__(self, ebins, czbins, detector_depth=None, earth_model=None,
                 prop_height=None, oversample_e=None,oversample_cz=None,gpu_id=None,
                 **kwargs):
        """
        \params:
          * ebins: Energy bin edges
          * czbins: cos(zenith) bin edges
          * earth_model: Earth density model used for matter oscillations.
          * detector_depth: Detector depth in km.
          * prop_height: Height in the atmosphere to begin in km.
          * gpu_id: If running on a system with multiple GPUs, it will choose
            the one with gpu_id. Otherwise, defaults to default context
        """

        self.gpu_id = gpu_id
        try:
            import pycuda.autoinit
            self.context = cuda.Device(self.gpu_id).make_context()
            print "Initializing PyCUDA using gpu id: %d"%self.gpu_id
        except:
            import pycuda.autoinit
            print "Auto initializing PyCUDA..."

        #mfree,mtot = cuda.mem_get_info()
        #print "free memory: %s mb",mfree/1.0e6
        #print "tot memory:  %s mb",mtot/1.0e6
        #raw_input("PAUSED...")

        logging.info('Instantiating %s'%self.__class__.__name__)
        self.ebins = np.array(ebins)
        self.czbins = np.array(czbins)
        self.prop_height = prop_height
        for ax in [self.ebins, self.czbins]:
            if (len(np.shape(ax)) != 1):
                raise IndexError('Axes must be 1d! '+str(np.shape(ax)))

        report_params(get_params(),['km','','','',''])

        earth_model = find_resource(earth_model)
        self.earth_model = earth_model
        self.FTYPE = np.float64

        self.ebins_fine = oversample_binning(self.ebins, oversample_e)
        self.czbins_fine = oversample_binning(self.czbins, oversample_cz)
        self.ecen_fine = get_bin_centers(self.ebins_fine)
        self.czcen_fine = get_bin_centers(self.czbins_fine)

        self.initialize_kernel(detector_depth,**kwargs)

        return
Beispiel #19
0
    def __init__(self,template_settings,ebins=None,czbins=None,oversample_e=None,
                 oversample_cz=None,**kwargs):
        '''
        TemplateMaker class handles all of the setup and calculation of the
        templates for a given binning.

        Parameters:
        * template_settings - dictionary of all template-making settings
        * ebins - energy bin edges
        * czbins - coszen bin edges
        '''

        self.ebins = ebins
        self.czbins = czbins
        self.oversample_e = oversample_e
        self.oversample_cz = oversample_cz
        logging.debug("Using %u bins in energy from %.2f to %.2f GeV"%
                      (len(self.ebins)-1,self.ebins[0],self.ebins[-1]))
        logging.debug("Using %u bins in cos(zenith) from %.2f to %.2f"%
                      (len(self.czbins)-1,self.czbins[0],self.czbins[-1]))

        #Instantiate a flux model service
        self.flux_service = HondaFluxService(**template_settings)

        # Oscillated Flux:
        if template_settings['osc_code']=='prob3':
            self.osc_service = Prob3OscillationService(self.ebins,self.czbins,
                                                       **template_settings)
        else:
            raise NotImplementedError('OscillationService is only implemented for prob3! osc_code = %s'%osc_code)

        # Aeff/True Event Rate:
        if template_settings['parametric']:
            logging.info(" Using effective area from PARAMETRIZATION...")
            self.aeff_service = AeffServicePar(self.ebins,self.czbins,
                                               **template_settings)
        else:
            logging.info(" Using effective area from MC EVENT DATA...")
            self.aeff_service = AeffServiceMC(self.ebins,self.czbins,
                                              **template_settings)

        # Reco Event Rate:
        self.reco_service = RecoServiceMC(self.ebins,self.czbins,
                                          **template_settings)

        # PID Service:
        self.pid_service = PIDServicePar(self.ebins,self.czbins,
                                         **template_settings)

        return
Beispiel #20
0
def plot_pid_stage(nmh, imh, title='', save=False, dpi=150, outdir=""):
    '''
    Plots templates and asymmetry for only the final level stage
    '''

    h_asym = get_asymmetry(nmh, imh, ['trck','cscd'])

    logging.info("  Total trck events (NMH): %d"%np.sum(nmh['trck']['map']))
    logging.info("  Total trck events (IMH): %d"%np.sum(imh['trck']['map']))
    logging.info("  Total cscd events (NMH): %d"%np.sum(nmh['cscd']['map']))
    logging.info("  Total cscd events (IMH): %d"%np.sum(imh['cscd']['map']))

    for chan in ['trck','cscd']:
        plt.figure(figsize=(16,5))

        plt.subplot(1,3,1)
        show_map(nmh[chan])
        plt.title(title+' NMH, '+chan+' counts',fontsize='large')

        plt.subplot(1,3,2)
        show_map(imh[chan])
        plt.title(title+' IMH, '+chan +' counts',fontsize='large')

        plt.subplot(1,3,3)
        sigma = np.sqrt(np.sum(h_asym[chan]['map']**2))
        show_map(h_asym[chan],cmap='RdBu_r')
        plt.title(title+' '+chan+r' asymmetry, $\sigma$ = %.3f'%sigma,
                  fontsize='large')

        if save:
            print "Saving %s chan..."%chan
            filename = os.path.join(outdir,title+'_asym_'+chan+'.png')
            plt.savefig(filename,dpi=dpi)

    return
Beispiel #21
0
def plot_asimov_line(llh_dict, tkey, max_yval, **kwargs):
    """
    llh_dict  - dictionary of llh data
    tkey      - key of the true hierarchy (from asimov or pseudo data set)
    max_yval  - maximum yvalue for asimov line.
    """

    validate_key(tkey)

    asimov_data = llh_dict[tkey]['asimov_data']
    asimov_data_null = llh_dict[tkey]['asimov_data_null']

    llh_asimov = get_binwise_llh(asimov_data,asimov_data)
    llh_null = -llh_dict[tkey]['llh_null']['llh'][-1]

    logging.info("  >> llh_asimov: %.4f"%llh_asimov)
    logging.info("  >> llh null: %.4f"%llh_null)
    logging.info("Null hypothesis: ")
    for k,v in llh_dict[tkey]['llh_null'].items():
        logging.info("  >> %s: %f"%(k,v[-1]))

    asimov_llr = (llh_null - llh_asimov if 'true_N' in tkey
                  else llh_asimov - llh_null)
    vline = plt.vlines(
        asimov_llr, 0.1, max_yval ,colors='k',**kwargs)

    return asimov_llr
Beispiel #22
0
    def get_template(self,params,return_stages=False):
        '''
        Runs entire template-making chain, using parameters found in
        'params' dict. If 'return_stages' is set to True, returns
        output from each stage as a simple tuple.
        '''

        flux_maps = get_flux_maps(self.flux_service,self.ebins,self.czbins)

        logging.info("Getting osc prob maps...")
        osc_flux_maps = get_osc_flux(flux_maps,self.osc_service,
            oversample_e=self.oversample_e,oversample_cz=self.oversample_cz,**params)

        logging.info("Getting event rate true maps...")
        event_rate_maps = get_event_rates(osc_flux_maps,self.aeff_service, **params)

        logging.info("Getting event rate reco maps...")
        event_rate_reco_maps = get_reco_maps(event_rate_maps,self.reco_service,
                                             **params)

        logging.info("Getting pid maps...")
        final_event_rate = get_pid_maps(event_rate_reco_maps,self.pid_service)

        if not return_stages:
            return final_event_rate

        # Otherwise, return all stages as a simple tuple
        return (flux_maps, osc_flux_maps, event_rate_maps, event_rate_reco_maps,
                final_event_rate)
def plot_posterior_params(frames, template_settings, plot_param_info=True, pbins=20, **kwargs):
    """Plot posterior parameter distributions, and related data"""

    ######################################################
    # Need a new algorithm here. What I want is to calculate the
    # number of figures then number of subfigures for each figure,
    # based on the number of columns.
    ######################################################

    good_columns = [col for col in frames[0].columns if col not in ["hypo", "pseudo_data"]]
    max_plots_per_fig = 4
    nfigs = (len(good_columns) - 1) / max_plots_per_fig + 1
    logging.info("len(good_cols): %d, nfigs: %d" % (len(good_columns), nfigs))

    figs = []
    fig_names = []
    colors = ["b", "r", "g", "k", "c", "m"]
    for frame in frames:
        ifig = 0
        data_key = frame["pseudo_data"][0]
        hypo_key = frame["hypo"][0]

        for icol, col_name in enumerate(good_columns):
            column = frame[col_name]
            # Create new fig if needed:
            if (icol % max_plots_per_fig) == 0:
                ifig += 1
                fig = plt.figure(figsize=(10, 10))
                fig_names.append(data_key + "_" + hypo_key + "_" + str(ifig) + ".png")
                figs.append(fig)
                fig.suptitle("Posteriors for %s, %s" % (data_key, hypo_key))
                # fontsize='large')

            # Why is this not adding subplot?...
            subplot = icol % max_plots_per_fig + 1
            color = "k" if plot_param_info else colors[icol % len(colors)]

            plot_column(
                data_key,
                hypo_key,
                subplot,
                column,
                template_settings,
                color,
                plot_param_info=plot_param_info,
                pbins=pbins,
            )

    return figs, fig_names
Beispiel #24
0
    def _get_reco_kernels(self, simfile=None, **kwargs):

        for reco_scale in ["e_reco_scale", "cz_reco_scale"]:
            if reco_scale in kwargs and kwargs[reco_scale] != 1:
                raise ValueError("%s = %.2f, must be 1.0 for RecoServiceMC!" % (reco_scale, kwargs[reco_scale]))

        if not simfile in [self.simfile, None]:
            logging.info("Reconstruction from non-default MC file %s!" % simfile)
            return kernel_from_simfile(simfile=simfile)

        if not hasattr(self, "kernels"):
            logging.info("Using file %s for default reconstruction" % (simfile))
            self.kernels = self.kernel_from_simfile(simfile=simfile)

        return self.kernels
Beispiel #25
0
def check_fine_binning(fine_bins, coarse_bins):
    """
    This function checks whether the specified fine binning exists and
    is actually finer than the coarse one.
    """
    if fine_bins is not None:
        if is_coarser_binning(coarse_bins, fine_bins):
            logging.info('Using requested binning for oversampling.')
            #everything is fine
            return True
        else:
            errmsg = 'Requested oversampled binning is coarser ' + \
                    'than output binning. Aborting.'
            logging.error(errmsg)
            raise ValueError(errmsg)
    return False
Beispiel #26
0
def make_llr_with_false_h(llr_true_h, llr_false_h, nbins, xlim=15):
    fig = plt.figure(figsize=(15,7))

    # 0) Plot true_h distributions, and get mean llr value
    logging.info(
        "  -->Plotting, calculating gaussian parameters for MC True:")
    colors = ['b','r']
    for ii,tkey in enumerate(['true_NH','true_IH']):
        plt.subplot(1,2,ii+1)
        #label = r'$\mathcal{L}$( %s | IMH)/$\mathcal{L}$( %s | NMH)'%(tkey,tkey)
        label = 'LLR(true Normal)' if tkey == 'true_NH' else 'LLR(true Inverted)'
        hvals, bincen, gfit = plot_llr_distribution(
            llr_true_h[tkey], tkey, nbins, color=colors[ii], label=label)



    logging.info("  -->Plotting  for false hierarchy best fit:")
    mc_table = []
    colors = ['r','b']
    for ii,tkey in enumerate(['true_NH','true_IH']):

        plt.subplot(1,2,ii+1)
        label=r'H$_0$: Other Hierarchy'
        hvals, bincen, gfit = plot_llr_distribution(
            llr_false_h[tkey], tkey, nbins, color=colors[ii], label=label)
        max_line = max(hvals)*1.2
        label=("Asimov_%s"%tkey)

        asimov_llr = llr_true_h[tkey].median()

        vline = plt.vlines(
            asimov_llr, 0.1, max_line ,colors='k')

        mcrow = plot_fill(
            llr_false_h[tkey], tkey, asimov_llr, hvals, bincen, gfit,
            alpha=0.5, hatch='xx', facecolor='black')
        plt.legend(framealpha=0.5,loc='best')

        ax = set_xlim(llr_true_h[tkey],llr_false_h[tkey])
        ax.set_ylim([0,max_line*1.2])
        mc_table.append(mcrow)
        plt.grid(False)


    plt.tight_layout()
    displayStats(mc_table)
    return fig
Beispiel #27
0
    def get_osc_prob_maps(self, **kwargs):
        """
        Returns an oscillation probability map dictionary calculated
        at the values of the input parameters:
          deltam21,deltam31,theta12,theta13,theta23,deltacp
        for flavor_from to flavor_to, with the binning of ebins,czbins.
        The dictionary is formatted as:
          'nue_maps': {'nue':map,'numu':map,'nutau':map},
          'numu_maps': {...}
          'nue_bar_maps': {...}
          'numu_bar_maps': {...}
        NOTES:
          * expects all angles in [rad]
          * this method doesn't calculate the oscillation probabilities
            itself, but calls get_osc_probLT_dict internally, to get a
            high resolution map of the oscillation probs,
        """

        # Get the finely binned maps as implemented in the derived class
        logging.info("Retrieving finely binned maps")
        with Timer(verbose=False) as t:
            fine_maps = self.get_osc_probLT_dict(**kwargs)
        print "       ==> elapsed time to get all fine maps: %s sec" % t.secs

        logging.info("Smoothing fine maps...")
        smoothed_maps = {}
        smoothed_maps["ebins"] = self.ebins
        smoothed_maps["czbins"] = self.czbins

        with Timer(verbose=False) as t:
            for from_nu, tomap_dict in fine_maps.items():
                if "vals" in from_nu:
                    continue
                new_tomaps = {}
                for to_nu, pvals in tomap_dict.items():
                    logging.debug("Getting smoothed map %s/%s" % (from_nu, to_nu))

                    new_tomaps[to_nu] = get_smoothed_map(
                        pvals, fine_maps["evals"], fine_maps["czvals"], self.ebins, self.czbins
                    )

                smoothed_maps[from_nu] = new_tomaps

        profile.debug("       ==> elapsed time to smooth maps: %s sec" % t.secs)

        return smoothed_maps
Beispiel #28
0
def plot_posterior_params(frames, template_settings, plot_llh=True,
                          plot_param_info=True, pbins=20, mctrue=False,
                          **kwargs):
    """Plot posterior parameter distributions, and related data"""

    good_columns = get_free_params(
        select_hierarchy(template_settings, normal_hierarchy=True)).keys()

    #good_columns = [col for col in frames[0].columns
    #                if col not in ['hypo','mctrue']]
    if plot_llh: good_columns.append('llh')
    print "good_columns: \n",good_columns

    max_plots_per_fig = 4
    nfigs = (len(good_columns)-1)/max_plots_per_fig + 1
    logging.info("len(good_cols): %d, nfigs: %d"%(len(good_columns),nfigs))

    figs = []
    fig_names = []
    colors = ['b','r','g','k','c','m']
    for frame in frames:
        ifig = 0
        true_key = frame['mctrue'][0]
        hypo_key = frame['hypo'][0]

        for icol,col_name in enumerate(good_columns):
            column = frame[col_name]
            # Create new fig if needed:
            if (icol%max_plots_per_fig) == 0:
                ifig += 1
                fig = plt.figure(figsize=(10,10))
                fig_names.append(true_key+"_"+hypo_key+"_"+str(ifig)+".png")
                figs.append(fig)
                fig.suptitle('Posteriors for %s, %s'%(true_key,hypo_key))
                             #fontsize='large')

            # Why is this not adding subplot?...
            subplot = (icol%max_plots_per_fig + 1)
            color = 'k' if plot_param_info else colors[icol%len(colors)]

            plot_column(
                true_key, hypo_key, subplot, column, template_settings,
                color,plot_param_info=plot_param_info,pbins=pbins,
                mctrue=mctrue)

    return figs,fig_names
Beispiel #29
0
    def get_template(self, params, return_stages=False):
        '''
        Runs entire template-making chain, using parameters found in
        'params' dict. If 'return_stages' is set to True, returns
        output from each stage as a simple tuple.
        '''

        logging.info("STAGE 1: Getting Atm Flux maps...")
        with Timer() as t:
            flux_maps = get_flux_maps(self.flux_service, self.ebins,
                                      self.czbins, **params)
        profile.debug("==> elapsed time for flux stage: %s sec"%t.secs)

        logging.info("STAGE 2: Getting osc prob maps...")
        with Timer() as t:
            osc_flux_maps = get_osc_flux(flux_maps, self.osc_service,
                                         oversample_e=self.oversample_e,
                                         oversample_cz=self.oversample_cz,
                                         **params)
        profile.debug("==> elapsed time for oscillations stage: %s sec"%t.secs)

        logging.info("STAGE 3: Getting event rate true maps...")
        with Timer() as t:
            event_rate_maps = get_event_rates(osc_flux_maps,
                                              self.aeff_service, **params)
        profile.debug("==> elapsed time for aeff stage: %s sec"%t.secs)

        logging.info("STAGE 4: Getting event rate reco maps...")
        with Timer() as t:
            event_rate_reco_maps = get_reco_maps(event_rate_maps,
                                                 self.reco_service,
                                                 **params)
        profile.debug("==> elapsed time for reco stage: %s sec"%t.secs)

        logging.info("STAGE 5: Getting pid maps...")
        with Timer(verbose=False) as t:
            final_event_rate = get_pid_maps(event_rate_reco_maps,
                                            self.pid_service)
        profile.debug("==> elapsed time for pid stage: %s sec"%t.secs)

        if not return_stages:
            return final_event_rate

        # Otherwise, return all stages as a simple tuple
        return (flux_maps, osc_flux_maps, event_rate_maps,
                event_rate_reco_maps, final_event_rate)
Beispiel #30
0
    def __init__(self, ebins, czbins, detector_depth=None, earth_model=None,
                 prop_height=None, **kwargs):
        """
        Parameters needed to instantiate a Prob3OscillationService:
        * ebins: Energy bin edges
        * czbins: cos(zenith) bin edges
        * earth_model: Earth density model used for matter oscillations.
        * detector_depth: Detector depth in km.
        * prop_height: Height in the atmosphere to begin in km.
        """
        OscillationServiceBase.__init__(self, ebins, czbins)
        logging.info('Initializing %s...'%self.__class__.__name__)

        report_params(get_params(),['km','','km'])

        self.prop_height = prop_height
        earth_model = find_resource(earth_model)
        self.barger_prop = BargerPropagator(earth_model, detector_depth)
        self.barger_prop.UseMassEigenstates(False)
Beispiel #31
0
def test_lookup_indices():
    """Unit tests for `lookup_indices` function"""

    #
    # Test a variety of points.
    # Points falling exactly on the bound are included in the
    #
    n_evts = 100

    x = np.array([-5, 0.5, 1.5, 7.0, 6.5, 8.0, 6.5], dtype=FTYPE)
    y = np.array([-5, 0.5, 1.5, 1.5, 3.0, 1.5, 2.5], dtype=FTYPE)
    z = np.array([-5, 0.5, 1.5, 1.5, 0.5, 6.0, 0.5], dtype=FTYPE)

    w = np.ones(n_evts, dtype=FTYPE)

    x = SmartArray(x)
    y = SmartArray(y)
    z = SmartArray(z)

    w = SmartArray(w)

    binning_x = OneDimBinning(name="x", num_bins=7, is_lin=True, domain=[0, 7])
    binning_y = OneDimBinning(name="y", num_bins=4, is_lin=True, domain=[0, 4])
    binning_z = OneDimBinning(name="z", num_bins=2, is_lin=True, domain=[0, 2])

    binning_1d = binning_x
    binning_2d = binning_x * binning_y
    binning_3d = binning_x * binning_y * binning_z

    # 1D case: check that each event falls into its predicted bin
    #
    # All values higher or equal to the last bin edges are assigned an index of zero
    #
    logging.trace("TEST 1D:")
    logging.trace("Total number of bins: {}".format(7))
    logging.trace("array in 1D: {}".format(x.get()))
    logging.trace("Binning: {}".format(binning_1d.bin_edges[0]))
    indices = lookup_indices([x], binning_1d)
    logging.trace("indices of each array element: {}".format(indices.get()))
    logging.trace("*********************************")
    test = indices.get()
    ref = np.array([-1, 0, 1, 6, 6, 7, 6])
    assert np.array_equal(test, ref), "test={} != ref={}".format(test, ref)

    # 2D case:
    #
    # The binning edges are flattened as follows:
    #   [(x=0, y=0), (x=0, y=1), (x=1, y=0), ...]
    #
    logging.trace("TEST 2D:")
    logging.trace("Total number of bins: {}".format(7 * 4))
    logging.trace("array in 2D: {}".format(list(zip(x.get(), y.get()))))
    logging.trace("Binning: {}".format(binning_2d.bin_edges))
    indices = lookup_indices([x, y], binning_2d)
    logging.trace("indices of each array element: {}".format(indices.get()))
    logging.trace("*********************************")
    test = indices.get()
    ref = np.array([-1, 0, 5, 25, 27, 28, 26])
    assert np.array_equal(test, ref), "test={} != ref={}".format(test, ref)

    # 3D case:
    #
    # the binning edges are flattened as follows:
    #   [(x=0, y=0, z=0), (x=0, y=0, z=1), (x=0, y=1, z=0)...]
    #
    logging.trace("TEST 3D:")
    logging.trace("Total number of bins: {}".format(7 * 4 * 2))
    logging.trace("array in 3D: {}".format(list(zip(x.get(), y.get(),
                                                    z.get()))))
    logging.trace("Binning: {}".format(binning_3d.bin_edges))
    indices = lookup_indices([x, y, z], binning_3d)
    logging.trace("indices of each array element: {}".format(indices.get()))
    logging.trace("*********************************")
    test = indices.get()
    ref = np.array([-1, 0, 11, 51, 54, 56, 52])
    assert np.array_equal(test, ref), "test={} != ref={}".format(test, ref)

    logging.info("<< PASS : test_lookup_indices >>")
Beispiel #32
0
def test_Data():
    """Unit tests for Data class"""
    # Instantiate from LEESARD file - located in $PISA_RESOURCES
    file_loc = 'LEESARD/PRD_extend_finalLevel/12550.pckl'
    file_loc2 = 'LEESARD/PRD_extend_finalLevel/14550.pckl'
    f = from_file(file_loc)
    f2 = from_file(file_loc2)
    d = {'nue+nuebar': f}
    d2 = {'numu+numubar': f2}
    data = Data(d)
    data2 = Data(d2)
    logging.debug(str((data.keys())))

    muon_file = 'GRECO/new_style_files/Level7_muongun.12370_15.pckl'
    m = {'muons': from_file(muon_file)}
    m = Data(val=m)
    assert m.contains_muons
    assert not m.contains_neutrinos
    logging.debug(str((m)))
    data = data + m
    assert data.contains_neutrinos
    logging.debug(str((data)))
    if not data.contains_muons:
        raise Exception("data doesn't contain muons.")
    logging.debug(str((data.neutrinos.keys())))

    noise_file = 'GRECO/new_style_files/Level7_VuvuzelaPureNoise_V2.990015.pckl'
    n = {'noise': from_file(muon_file)}
    n = Data(val=n)
    assert n.contains_noise
    assert not n.contains_neutrinos
    logging.debug(str((n)))
    data = data + n
    assert data.contains_neutrinos
    logging.debug(str((data)))
    if not data.contains_noise:
        raise Exception("data doesn't contain noise.")
    logging.debug(str((data.neutrinos.keys())))

    # Apply a simple cut
    # data.applyCut('(zenith <= 1.1) & (energy <= 200)')
    # for fi in data.flavint_groups:
    #     assert np.max(data[fi]['zenith']) <= 1.1
    #     assert np.max(data[fi]['energy']) <= 200

    # Apply an "inbounds" cut via a OneDimBinning
    # e_binning = OneDimBinning(
    #     name='energy', num_bins=80, is_log=True, domain=[10, 200]*ureg.GeV
    # )
    # data.keepInbounds(e_binning)
    # for fi in data.flavint_groups:
    #     assert np.min(data[fi]['energy']) >= 10
    #     assert np.max(data[fi]['energy']) <= 200

    # Apply an "inbounds" cut via a MultiDimBinning
    # e_binning = OneDimBinning(
    #     name='energy', num_bins=80, is_log=True, domain=[20, 210]*ureg.GeV
    # )
    # cz_binning = OneDimBinning(
    #     name='zenith', num_bins=40, is_lin=True, domain=[0.1, 1.8*np.pi]
    # )
    # mdb = MultiDimBinning([e_binning, cz_binning])
    # data.keepInbounds(mdb)
    # for fi in data.flavint_groups:
    #     assert np.min(data[fi]['energy']) >= 20
    #     assert np.max(data[fi]['energy']) <= 210
    #     assert np.min(data[fi]['zenith']) >= 0.1
    #     assert np.max(data[fi]['zenith']) <= 1.8*np.pi

    # # Now try to apply a cut that fails on one flav/int (since the field will
    # # be missing) and make sure that the cut did not get applied anywhere in
    # # the end (i.e., it is rolled back)
    # sub_evts = data['nue+nuebar']
    # sub_evts.pop('energy')
    # data['nue+nuebar'] = sub_evts
    # try:
    #     data.applyCut('(energy >= 30) & (energy <= 40)')
    # except Exception:
    #     pass
    # else:
    #     raise Exception('Should not have been able to apply the cut!')
    # for fi in data.flavint_groups:
    #     if fi == NuFlavIntGroup('nue+nuebar'):
    #         continue
    #     assert np.min(data[fi]['energy']) < 30

    data.save('/tmp/test_FlavIntDataGroup.json')
    data.save('/tmp/test_FlavIntDataGroup.hdf5')
    data = Data('/tmp/test_FlavIntDataGroup.json')
    data = Data(val='/tmp/test_FlavIntDataGroup.hdf5')

    d3 = data + data2 + m
    logging.debug(str((d3)))
    d3_com = d3.transform_groups(['nue+nuebar+numu+numubar'])
    logging.debug(str((d3_com)))

    logging.info('<< PASS : test_Data >>')
Beispiel #33
0
    def _compute_nominal_outputs(self):
        """load the evnts from file, perform sanity checks and histogram them
        (into final MapSet)

        """
        # get params
        data_file_name = self.params.data_file.value
        sim_version = self.params.sim_ver.value
        bdt_cut = self.params.bdt_cut.value.m_as('dimensionless')

        self.bin_names = self.output_binning.names

        # TODO: convert units using e.g. `comp_units` in stages/reco/hist.py
        self.bin_edges = []
        for name in self.bin_names:
            if 'energy' in  name:
                bin_edges = self.output_binning[name].bin_edges.to('GeV').magnitude
            else:
                bin_edges = self.output_binning[name].bin_edges.magnitude
            self.bin_edges.append(bin_edges)

        # the rest of this function is PISA v2 legacy code...
        # right now only use burn sample with sim_version = '4digit'
        #print "sim_version == ", sim_version
        if sim_version == "4digit":
            Reco_Neutrino_Name = 'IC86_Dunkman_L6_MultiNest8D_PDG_Neutrino'
            Reco_Track_Name = 'IC86_Dunkman_L6_MultiNest8D_PDG_Track'
        elif sim_version == "5digit" or sim_version=="dima":
            Reco_Neutrino_Name = 'IC86_Dunkman_L6_PegLeg_MultiNest8D_NumuCC'
            Reco_Track_Name = 'IC86_Dunkman_L6_PegLeg_MultiNest8D_Track'
        else:
            raise ValueError(
                'only allow 4digit, 5digit(H2 model for hole ice) or'
                ' dima (dima p1 and p2 for hole ice)!'
            )

        data_file = h5py.File(find_resource(data_file_name), 'r')
        L6_result = np.array(data_file['IC86_Dunkman_L6']['result'])
        dLLH = np.array(data_file['IC86_Dunkman_L6']['delta_LLH'])
        reco_energy_all = np.array(data_file[Reco_Neutrino_Name]['energy'])
        reco_coszen_all = np.array(np.cos(
            data_file[Reco_Neutrino_Name]['zenith']
        ))
        reco_trck_len_all = np.array(data_file[Reco_Track_Name]['length'])
        #print "before L6 cut, no. of burn sample = ", len(reco_coszen_all)

        # sanity check
        santa_doms = data_file['IC86_Dunkman_L6_SANTA_DirectDOMs']['value']
        l3 = data_file['IC86_Dunkman_L3']['value']
        l4 = data_file['IC86_Dunkman_L4']['result']
        l5 = data_file['IC86_Dunkman_L5']['bdt_score']
        assert(np.all(santa_doms>=3) and np.all(l3 == 1) and np.all(l5 >= 0.1))

        # l4==1 was not applied when i3 files were written to hdf5 files, so do
        # it here
        dLLH = dLLH[l4==1]
        reco_energy_all = reco_energy_all[l4==1]
        reco_coszen_all = reco_coszen_all[l4==1]
        l5 = l5[l4==1]
        L6_result = L6_result[l4==1]
        data_file.close()

        dLLH_L6 = dLLH[L6_result==1]
        l5 = l5[L6_result==1]
        reco_energy_L6 = reco_energy_all[L6_result==1]
        reco_coszen_L6 = reco_coszen_all[L6_result==1]
        #print "after L6 cut, no. of burn sample = ", len(reco_coszen_L6)

        # Cut: Only keep bdt score >= 0.2 (from MSU latest result, make data/MC
        # agree much better); if use no such further cut, use bdt_cut = 0.1
        logging.info(
            "Cut2, removing events with bdt_score < %s i.e. only keep bdt > %s"
            %(bdt_cut, bdt_cut)
        )
        cut_events = {}
        cut = l5>=bdt_cut
        cut_events['reco_energy'] = reco_energy_L6[cut]
        cut_events['reco_coszen'] = reco_coszen_L6[cut]
        cut_events['pid'] = dLLH_L6[cut]

        hist, _ = np.histogramdd(sample = np.array(
            [cut_events[bin_name] for bin_name in self.bin_names]
        ).T, bins=self.bin_edges)

        maps = [Map(name=self.output_names[0], hist=hist,
                    binning=self.output_binning)]
        self.template = MapSet(maps, name='data')
def run_interpolated_fit(fit_directory, job_idx, skip_successful=False):
    """Run the hypersurface fit for a grid point.
    
    If `skip_successful` is true, do not run if the `fit_successful` flag is already
    True.
    """
    
    assert os.path.isdir(fit_directory), "fit directory does not exist"
    
    gridpoint_json = os.path.join(fit_directory, f"gridpoint_{job_idx:06d}.json.bz2")
    gridpoint_data = from_json(gridpoint_json)

    if skip_successful and gridpoint_data["fit_successful"]:
        logging.info(f"Fit at job index {job_idx} already successful, skipping...")
        return

    metadata = from_json(os.path.join(fit_directory, "metadata.json"))
    
    interpolation_param_spec = metadata["interpolation_param_spec"]
    
    # this is a pipeline configuration in the form of an OrderedDict
    nominal_dataset = metadata["nominal_dataset"]
    # Why can we still not load PISA objects from JSON that are inside a dict?! Grrr...
    nominal_dataset["pipeline_cfg"] = pipeline_cfg_from_states(
        nominal_dataset["pipeline_cfg"]
    )
    # this is a list of pipeline configurations
    sys_datasets = metadata["sys_datasets"]
    for sys_dataset in sys_datasets:
        sys_dataset["pipeline_cfg"] = pipeline_cfg_from_states(
            sys_dataset["pipeline_cfg"]
        )
    # this is a dict of param_name : value pairs
    param_values = gridpoint_data["param_values"]
    # we do a redundant check to make sure the parameter values at this grid point are
    # correct
    interpolation_param_names = metadata["interpolation_param_names"]
    grid_shape = tuple(metadata["grid_shape"])
    # the grid point index of this job
    grid_idx = list(np.ndindex(grid_shape))[job_idx]
    for i, n in enumerate(interpolation_param_names):
        ms = "Inconsistent parameter values at grid point!"
        assert interpolation_param_spec[n]["values"][grid_idx[i]] == param_values[n], ms
    
    # now we need to adjust the values of the parameter in all pipelines for this point
    logging.info(f"updating pipelines with parameter values: {param_values}")
    for dataset in [nominal_dataset] + sys_datasets:
        for stage_cfg in dataset["pipeline_cfg"].values():
            if "params" not in stage_cfg.keys(): continue
            for param in interpolation_param_names:
                if param in stage_cfg["params"].names:
                    stage_cfg["params"][param].value = param_values[param]
    
    # these are the parameters of the hypersurface, NOT the ones we interpolate them
    # over!
    hypersurface_params = []
    for param_state in metadata["hypersurface_params"]:
        hypersurface_params.append(HypersurfaceParam.from_state(param_state))
    
    # We create Pipeline objects, get their outputs and then forget about the Pipeline
    # object on purpose! The memory requirement to hold all systematic sets at the same
    # time is just too large, especially on the cluster. The way we do it below we
    # only need enough memory for one dataset at a time.
    nominal_dataset["mapset"] = Pipeline(nominal_dataset["pipeline_cfg"]).get_outputs()
    for sys_dataset in sys_datasets:
        sys_dataset["mapset"] = Pipeline(sys_dataset["pipeline_cfg"]).get_outputs()
    
    # Merge maps according to the combine regex, if one was provided
    combine_regex = metadata["combine_regex"]
    if combine_regex is not None:
        nominal_dataset["mapset"] = nominal_dataset["mapset"].combine_re(combine_regex)
        for sys_dataset in sys_datasets:
            sys_dataset["mapset"] = sys_dataset["mapset"].combine_re(combine_regex)

    hypersurface_fit_kw = metadata["hypersurface_fit_kw"]
    hypersurfaces = collections.OrderedDict()
    log = metadata["log"]  # flag determining whether hs fit is run in log-space or not
    for map_name in nominal_dataset["mapset"].names:
        nominal_map = nominal_dataset["mapset"][map_name]
        nominal_param_values = nominal_dataset["sys_params"]

        sys_maps = [sys_dataset["mapset"][map_name] for sys_dataset in sys_datasets]
        sys_param_values = [sys_dataset["sys_params"] for sys_dataset in sys_datasets]

        hypersurface = Hypersurface(
            # Yes, this MUST be a deepcopy! Otherwise weird memory overwrites happen
            # and all the numbers get jumbled across the hypersurfaces of different maps
            params=copy.deepcopy(hypersurface_params),
            initial_intercept=0. if log else 1.,  # Initial value for intercept
            log=log
        )

        hypersurface.fit(
            nominal_map=nominal_map,
            nominal_param_values=nominal_param_values,
            sys_maps=sys_maps,
            sys_param_values=sys_param_values,
            norm=True,
            # Is the space or loading time really a problem?
            # keep_maps=False,  # it would take a lot more space otherwise
            **hypersurface_fit_kw
        )

        logging.debug("\nFitted hypersurface report:\n%s" % hypersurface)
        hypersurfaces[map_name] = hypersurface

    gridpoint_data["hs_fit"] = hypersurfaces
    gridpoint_data["fit_successful"] = True
    
    to_json(gridpoint_data, gridpoint_json)
Beispiel #35
0
def get_fisher_matrices(template_settings,
                        grid_settings,
                        IMH=True,
                        NMH=False,
                        dump_all_stages=False,
                        save_templates=False,
                        outdir=None):
    '''
  Main function that runs the Fisher analysis for the chosen hierarchy(ies) (inverted by default).

  Returns a dictionary of Fisher matrices, in the format:
  {'IMH': {'cscd': [...],
          'trck': [...],
          'comb': [...],
          },
  'NMH': {'cscd': [...],
          'trck': [...],
          'comb': [...],
         }
  }

  If save_templates=True and no hierarchy is given, only fiducial templates will be written out;
  if one is given, then the templates used to obtain the gradients will be written out in
  addition.
  '''
    if outdir is None and (save_templates or dump_all_stages):
        logging.info(
            "No output directory specified. Will save templates to current working directory."
        )
        outdir = os.getcwd()

    tprofile.info("start initializing")

    # Get the parameters
    params = template_settings['params']
    bins = template_settings['binning']

    # Artifically add the hierarchy parameter to the list of parameters
    # The method get_hierarchy_gradients below will know how to deal with it
    params['hierarchy_nh'] = {
        "value": 1.,
        "range": [0., 1.],
        "fixed": False,
        "prior": None
    }
    params['hierarchy_ih'] = {
        "value": 0.,
        "range": [0., 1.],
        "fixed": False,
        "prior": None
    }

    chosen_data = []
    if IMH:
        chosen_data.append(('IMH', False))
        logging.info("Fisher matrix will be built for IMH.")
    if NMH:
        chosen_data.append(('NMH', True))
        logging.info("Fisher matrix will be built for NMH.")
    if chosen_data == []:
        # In this case, only the fiducial maps (for both hierarchies) will be written
        logging.info("No Fisher matrices will be built.")

    # There is no sense in performing any of the following steps if no Fisher matrices are to be built
    # and no templates are to be saved.
    if chosen_data != [] or dump_all_stages or save_templates:

        # Initialise return dict to hold Fisher matrices
        fisher = {
            data_tag: {
                'cscd': [],
                'trck': [],
                'comb': []
            }
            for data_tag, data_normal in chosen_data
        }

        # Get a template maker with the settings used to initialize
        template_maker = TemplateMaker(get_values(params), **bins)

        tprofile.info("stop initializing\n")

        # Generate fiducial templates for both hierarchies (needed for partial derivatives
        # w.r.t. hierarchy parameter)
        fiducial_maps = {}
        for hierarchy in ['NMH', 'IMH']:

            logging.info("Generating fiducial templates for %s." % hierarchy)

            # Get the fiducial parameter values corresponding to this hierarchy
            fiducial_params = select_hierarchy(
                params, normal_hierarchy=(hierarchy == 'NMH'))

            # Generate fiducial maps, either all of them or only the ultimate one
            tprofile.info("start template calculation")
            with Timer() as t:
                fid_maps = template_maker.get_template(
                    get_values(fiducial_params), return_stages=dump_all_stages)
            tprofile.info("==> elapsed time for template: %s sec" % t.secs)

            fiducial_maps[
                hierarchy] = fid_maps[4] if dump_all_stages else fid_maps

            # save fiducial map(s)
            # all stages
            if dump_all_stages:
                stage_names = ("0_unoscillated_flux", "1_oscillated_flux",
                               "2_oscillated_counts", "3_reco", "4_pid")
                stage_maps = {}
                for stage in xrange(0, len(fid_maps)):
                    stage_maps[stage_names[stage]] = fid_maps[stage]
                logging.info(
                    "Writing fiducial maps (all stages) for %s to %s." %
                    (hierarchy, outdir))
                to_json(stage_maps,
                        os.path.join(outdir, "fid_map_" + hierarchy + ".json"))
            # only the final stage
            elif save_templates:
                logging.info(
                    "Writing fiducial map (final stage) for %s to %s." %
                    (hierarchy, outdir))
                to_json(fiducial_maps[hierarchy],
                        os.path.join(outdir, "fid_map_" + hierarchy + ".json"))

        # Get_gradients and get_hierarchy_gradients will both (temporarily)
        # store the templates used to generate the gradient maps
        store_dir = outdir if save_templates else tempfile.gettempdir()

        # Calculate Fisher matrices for the user-defined cases (NHM true and/or IMH true)
        for data_tag, data_normal in chosen_data:

            logging.info("Running Fisher analysis for %s." % (data_tag))

            # The fiducial params are selected from the hierarchy case that does NOT match
            # the data, as we are varying from this model to find the 'best fit'
            fiducial_params = select_hierarchy(params, not data_normal)

            # Get the free parameters (i.e. those for which the gradients should be calculated)
            free_params = select_hierarchy(get_free_params(params),
                                           not data_normal)
            gradient_maps = {}
            for param in free_params.keys():
                # Special treatment for the hierarchy parameter
                if param == 'hierarchy':
                    gradient_maps[param] = get_hierarchy_gradients(
                        data_tag=data_tag,
                        fiducial_maps=fiducial_maps,
                        fiducial_params=fiducial_params,
                        grid_settings=grid_settings,
                        store_dir=store_dir)
                else:
                    gradient_maps[param] = get_gradients(
                        data_tag=data_tag,
                        param=param,
                        template_maker=template_maker,
                        fiducial_params=fiducial_params,
                        grid_settings=grid_settings,
                        store_dir=store_dir)

            logging.info("Building Fisher matrix for %s." % (data_tag))

            # Build Fisher matrices for the given hierarchy
            fisher[data_tag] = build_fisher_matrix(
                gradient_maps=gradient_maps,
                fiducial_map=fiducial_maps['IMH']
                if data_normal else fiducial_maps['NMH'],
                template_settings=fiducial_params)

            # If Fisher matrices exist for both channels, add the matrices to obtain
            # the combined one.
            if len(fisher[data_tag].keys()) > 1:
                fisher[data_tag]['comb'] = FisherMatrix(
                    matrix=np.array([
                        f.matrix for f in fisher[data_tag].itervalues()
                    ]).sum(axis=0),
                    parameters=gradient_maps.keys(),  #order is important here!
                    best_fits=[
                        fiducial_params[par]['value']
                        for par in gradient_maps.keys()
                    ],
                    priors=[
                        Prior.from_param(fiducial_params[par])
                        for par in gradient_maps.keys()
                    ],
                )
        return fisher

    else:
        logging.info("Nothing to be done.")
        return {}
Beispiel #36
0
            data_maker.params.fix(p_name)

        analysis = Analysis(data_maker=data_maker,
                            template_maker=template_maker,
                            metric=args.metric,
                            blind=args.blind)

        analysis.minimizer_settings = from_file(args.minimizer_settings)
        analysis.pseudodata_method = args.pseudo_data

        #analysis.randomize_free_params()

        results = []

        for i in range(args.num_trials):
            logging.info('Running trial %i' % i)
            np.random.seed()
            analysis.generate_psudodata()

            if args.function == 'profile':
                if args.mode == 'H0':
                    results.append(
                        analysis.profile(args.var, [0.] * ureg.dimensionless,
                                         check_octant=not args.no_check_octant,
                                         pprint=not args.quiet))
                elif args.mode == 'scan':
                    results.append(
                        analysis.profile(args.var,
                                         eval(args.range),
                                         check_octant=not args.no_check_octant,
                                         pprint=not args.quiet))
Beispiel #37
0
    def run_minimizer(self, pprint=True, skip=False):
        # Get initial values
        x0 = self.template_maker.params.free._rescaled_values

        # bfgs steps outside of given bounds by 1 epsilon to evaluate gradients
        try:
            epsilon = self.minimizer_settings['options']['value']['eps']
        except:
            epsilon = self.minimizer_settings['options']['value']['epsilon']
        bounds = [(0 + epsilon, 1 - epsilon)] * len(x0)
        logging.info('running the %s optimizer' %
                     self.minimizer_settings['method']['value'])

        # Using scipy.opt.minimize allows a whole host of minimisers to be used
        # This set by the method value in your minimiser settings file
        self.n_minimizer_calls = 0
        if skip:
            best_fit_vals = x0
            metric_val = self._minimizer_callable(x0, False)
            dict_flags = {
                'warnflag': 0,
                'task': 'skip',
                'funcalls': 0,
                'nit': 0,
                'avg_tmp_time': 0,
                'n_minimizer_calls': 0
            }
        else:
            start_t = time.time()
            minim_result = opt.minimize(
                fun=self._minimizer_callable,
                x0=x0,
                args=(pprint, ),
                bounds=bounds,
                method=self.minimizer_settings['method']['value'],
                options=self.minimizer_settings['options']['value'])

            # get aditional metrics:
            end_t = time.time()
            if pprint:
                # clear the line
                print ''
            print '\naverage template generation time during minimizer run: %.4f ms' % (
                (end_t - start_t) * 1000. / self.n_minimizer_calls)
            avg_tmp_time = (end_t - start_t) * 1000. / self.n_minimizer_calls
            best_fit_vals = minim_result.x
            metric_val = minim_result.fun
            template = self.template_maker.get_outputs()
            template = [t.combine_wildcard('*') for t in template]
            template[0].name = 'total'
            dict_flags = {}
            mod_chi2_val = (
                self.pseudodata.metric_total(expected_values=template,
                                             metric='mod_chi2') +
                template_maker.params.priors_penalty(metric='mod_chi2'))
            dict_flags['agreement_mod_chi2'] = mod_chi2_val
            dict_flags['warnflag'] = minim_result.status
            dict_flags['task'] = minim_result.message
            if minim_result.has_key('jac'):
                dict_flags['grad'] = minim_result.jac
            dict_flags['funcalls'] = minim_result.nfev
            dict_flags['nit'] = minim_result.nit
            dict_flags['avg_tmp_time'] = avg_tmp_time
            dict_flags['n_minimizer_calls'] = self.n_minimizer_calls
            if dict_flags['warnflag'] > 0:
                logging.warning(str(dict_flags))

        all_metrics = {}
        template = self.template_maker.get_outputs()
        template = [t.combine_wildcard('*') for t in template]
        template[0].name = 'total'
        #for metric in ['llh', 'conv_llh', 'barlow_llh','chi2', 'mod_chi2']:
        for metric in ['llh', 'chi2']:
            all_metrics[metric] = self.pseudodata.metric_total(
                expected_values=template,
                metric=metric) + template_maker.params.priors_penalty(
                    metric=metric)

        return best_fit_vals, metric_val, all_metrics, dict_flags
Beispiel #38
0
aeff_list = []
aeff_err_list = []
flavor_list = []

cut_sim_down = True
solid_angle = 2.0*np.pi
if args.all_cz:
    # Then use all sky, don't remove simulated downgoing events:
    cut_sim_down = False
    solid_angle = 4.0*np.pi

# Loop over all neutrino flavours, and get cc Aeff:
for flav,val in nuDict.items():

    logging.info("Loading data for %s..."%flav)
    data = LoadData(args.data_dir,args.geom_str,flav)

    cc_cuts = list(s1_s2_cuts)
    cc_cuts.append(("I3MCWeightDict","InteractionType",1))
    cc_cuts.append((args.mcnu,"type",val))

    cut_list = get_arb_cuts(data,cc_cuts,mcnu=args.mcnu,cut_sim_down=cut_sim_down)

    logging.info("  NEvents: %d"%np.sum(cut_list))

    if 'nue' in flav: nfiles = args.ne
    elif 'numu' in flav: nfiles = args.nmu
    elif 'nutau' in flav: nfiles = args.ntau
    else: raise ValueError("Unrecognized flav: %s"%flav)
Beispiel #39
0
def test_to_json_from_json():
    """Unit tests for writing various types of objects to and reading from JSON
    files (including bz2-compressed and xor-scrambled files)"""
    # pylint: disable=unused-variable
    from shutil import rmtree
    import sys
    from pisa.utils.comparisons import recursiveEquality

    proto_float_array = np.array([-np.inf, np.nan, np.inf, -1.1, 0.0, 1.1],
                                 dtype=np.float64)
    proto_int_array = np.array([-2, -1, 0, 1, 2], dtype=np.int64)
    proto_str_array = np.array(['a', 'ab', 'abc', '', ' '], dtype=str)

    floating_types = [float] + sorted(
        set(t for _, t in np.typeDict.items() if issubclass(t, np.floating)),
        key=str,
    )
    integer_types = [int] + sorted(
        set(t for _, t in np.typeDict.items() if issubclass(t, np.integer)),
        key=str,
    )

    test_info = [
        dict(
            proto_array=proto_float_array,
            dtypes=floating_types,
        ),
        dict(
            proto_array=proto_int_array,
            dtypes=integer_types,
        ),
        # TODO: strings currently do not work
        #dict(
        #    proto_array=proto_str_array,
        #    dtypes=[str, np.str0, np.str_, np.string_],
        #),
    ]

    test_data = OrderedDict()
    for info in test_info:
        proto_array = info['proto_array']
        for dtype in info['dtypes']:
            typed_array = proto_array.astype(dtype)
            s_dtype = str(np.dtype(dtype))
            test_data["array_" + s_dtype] = typed_array
            test_data["scalar_" + s_dtype] = dtype(typed_array[0])

    temp_dir = tempfile.mkdtemp()
    try:
        for name, obj in test_data.items():
            # Test that the object can be written / read directly
            base_fname = os.path.join(temp_dir, name + '.json')
            for ext in ['', '.bz2', '.xor']:
                fname = base_fname + ext
                to_json(obj, fname)
                loaded_data = from_json(fname)
                if obj.dtype in floating_types:
                    assert np.allclose(
                        loaded_data, obj, rtol=1e-12, atol=0, equal_nan=True
                    ), '{}=\n{}\nloaded=\n{}\nsee file: {}'.format(
                        name, obj, loaded_data, fname)
                else:
                    assert np.all(loaded_data == obj), \
                        '{}=\n{}\nloaded_nda=\n{}\nsee file: {}'.format(
                            name, obj, loaded_data, fname
                        )

            # Test that the same object can be written / read as a value in a
            # dictionary
            orig = OrderedDict([(name, obj), (name + "x", obj)])
            base_fname = os.path.join(temp_dir, 'd.{}.json'.format(name))
            for ext in ['', '.bz2', '.xor']:
                fname = base_fname + ext
                to_json(orig, fname)
                loaded = from_json(fname)
                assert recursiveEquality(loaded, orig), \
                    'orig=\n{}\nloaded=\n{}\nsee file: {}'.format(
                        orig, loaded, fname
                    )
    finally:
        rmtree(temp_dir)

    logging.info('<< PASS : test_to_json_from_json >>')
def load_interpolated_hypersurfaces(input_file):
    '''
    Load a set of interpolated hypersurfaces from a file.

    Analogously to "load_hypersurfaces", this function returns a
    collection with a HypersurfaceInterpolator object for each Map.

    Parameters
    ----------
    input_file : str
        A JSON input file as produced by fit_hypersurfaces if interpolation params
        were given. It has the form::
            {
                interpolation_param_spec = {
                    'param1': {"values": [val1_1, val1_2, ...], "scales_log": True/False}
                    'param2': {"values": [val2_1, val2_2, ...], "scales_log": True/False}
                    ...
                    'paramN': {"values": [valN_1, valN_2, ...], "scales_log": True/False}
                },
                'hs_fits': [
                    <list of dicts where keys are map names such as 'nue_cc' and values
                    are hypersurface states>
                ]
            }

    Returns
    -------
    collections.OrderedDict
        dictionary with a :obj:`HypersurfaceInterpolator` for each map
    '''
    assert isinstance(input_file, str)

    if input_file.endswith("json") or input_file.endswith("json.bz2"):
        logging.info(f"Loading interpolated hypersurfaces from file: {input_file}")
        input_data = from_json(input_file)
        assert set(['interpolation_param_spec', 'hs_fits']).issubset(
            set(input_data.keys())), 'missing keys'
        map_names = None
        # input_data['hs_fits'] is a list of dicts, each dict contains "param_values"
        # and "hs_fit"
        logging.info("Reading file complete, generating hypersurfaces...")
        for hs_fit_dict in input_data['hs_fits']:
            # this is still not the actual Hypersurface, but a dict with the (linked)
            # maps and the HS fit for the map...
            hs_state_maps = hs_fit_dict["hs_fit"]
            if map_names is None:
                map_names = list(hs_state_maps.keys())
            else:
                assert set(map_names) == set(hs_state_maps.keys()), "inconsistent maps"
            # When data is recovered from JSON, the object states are not automatically
            # converted to the corresponding objects, so we need to do it manually here.
            for map_name in map_names:
                hs_state_maps[map_name] = Hypersurface.from_state(hs_state_maps[map_name])

        logging.info(f"Read hypersurface maps: {map_names}")
        
        # Now we have a list of dicts where the map names are on the lower level.
        # We need to convert this into a dict of HypersurfaceInterpolator objects.
        output = collections.OrderedDict()
        for m in map_names:
            hs_fits = [{"param_values": fd["param_values"], "hs_fit": fd['hs_fit'][m]} for fd in input_data['hs_fits']]
            output[m] = HypersurfaceInterpolator(input_data['interpolation_param_spec'], hs_fits)
    else:
        raise Exception("unknown file format")
    return output
Beispiel #41
0
def test_bootstrap():
    """Unit test for the bootstrap stage."""

    from pisa.core.distribution_maker import DistributionMaker
    from pisa.core.map import Map
    from pisa.utils.config_parser import parse_pipeline_config
    from pisa.utils.comparisons import ALLCLOSE_KW

    from numpy.testing import assert_allclose

    example_cfg = parse_pipeline_config("settings/pipeline/example.cfg")

    # We need to insert the bootstrap stage right after the data loading stage
    bootstrap_pipe_cfg = insert_bootstrap_after_data_loader(example_cfg, seed=0)

    logging.debug("bootstrapped pipeline stage order:")
    logging.debug(list(bootstrap_pipe_cfg.keys()))

    # get a baseline
    dmaker = DistributionMaker([example_cfg])
    map_baseline = dmaker.get_outputs(return_sum=True)[0]

    # Make sure that different seeds produce different maps, and that the same seed will
    # produce the same map.
    dmaker = DistributionMaker([bootstrap_pipe_cfg])
    map_seed0 = dmaker.get_outputs(return_sum=True)[0]

    # find key of bootstrap stage
    bootstrap_idx = 0
    for i, stage in enumerate(dmaker.pipelines[0].stages):
        if stage.__class__.__name__ == "bootstrap":
            bootstrap_idx = i

    # without re-loading the entire pipeline, we set the seed and call the setup function
    # to save time for the test
    dmaker.pipelines[0].stages[bootstrap_idx].seed = 1
    dmaker.pipelines[0].stages[bootstrap_idx].setup()

    map_seed1 = dmaker.get_outputs(return_sum=True)[0]

    assert not map_seed0 == map_seed1

    dmaker.pipelines[0].stages[bootstrap_idx].seed = 0
    dmaker.pipelines[0].stages[bootstrap_idx].setup()
    map_seed0_reprod = dmaker.get_outputs(return_sum=True)[0]

    assert map_seed0 == map_seed0_reprod

    # Quantify the variance of the resulting maps. They should be about the size of the
    # expectation from sum of weights-squared.

    nominal_values = []
    for i in range(100):
        dmaker.pipelines[0].stages[bootstrap_idx].seed = i
        dmaker.pipelines[0].stages[bootstrap_idx].setup()
        map_bootstrap = dmaker.get_outputs(return_sum=True)[0]
        nominal_values.append(map_bootstrap.nominal_values)

    nominal_values = np.stack(nominal_values)
    with np.errstate(divide="ignore", invalid="ignore"):
        # calculate the ratio between the bootstrap nominal and the baseline nominal
        bs_nom_ratios = np.mean(nominal_values, axis=0) / map_baseline.nominal_values
        # and the standard deviation ratio as well
        bs_std_ratios = np.std(nominal_values, axis=0) / map_baseline.std_devs
        # assert that both nominal and standard deviation match the expectation from
        # baseline up to a small error
        assert np.abs(np.nanmean(bs_nom_ratios) - 1.0) < 0.01
        # the standard deviations are a little harder to match in 100 samples
        assert np.abs(np.nanmean(bs_std_ratios) - 1.0) < 0.02

    logging.info("<< PASS : bootstrap >>")
Beispiel #42
0
def test_MutableMultiFileIterator():
    """Unit test for class `MutableMultiFileIterator`"""
    import shutil
    import tempfile

    prefixes = ['a', 'b', 'c']
    file_len = 4

    reference_lines = [
        # start in file a
        'a0',
        'a1',
        # switch to file b after second line of a
        'b0',
        'b1',
        # switch to file c after second line of b
        'c0',
        'c1',
        'c2',
        'c3',
        # switch back to b after exhausting c
        'b2',
        'b3',
        # switch back to a after exhausting b
        'a2',
        'a3'
    ]

    tempdir = tempfile.mkdtemp()
    try:
        # Create test files
        paths = [join(tempdir, prefix) for prefix in prefixes]
        for prefix, path in zip(prefixes, paths):
            with open(path, 'w') as f:
                for i in range(file_len):
                    f.write('%s%d\n' % (prefix, i))
            logging.trace(path)

        actual_lines = []
        with open(paths[0]) as fp:
            file_iter = MutableMultiFileIterator(fp=fp, fpname=paths[0])

            remaining_paths = paths[1:]

            for record in file_iter:
                actual_lines.append(record['line'].strip())
                logging.trace(str(record))
                if record['line'][1:].strip() == '1':
                    if remaining_paths:
                        path = remaining_paths.pop(0)
                        file_iter.switch_to_file(fpname=path)
                    else:
                        for l in str(file_iter.location).split('\n'):
                            logging.trace(l)
    except:
        shutil.rmtree(tempdir)
        raise

    if actual_lines != reference_lines:
        raise ValueError('<< FAIL : test_MutableMultiFileIterator >>')

    logging.info('<< PASS : test_MutableMultiFileIterator >>')
Beispiel #43
0
def test_hdf():
    """Unit tests for hdf module"""
    from shutil import rmtree
    from tempfile import mkdtemp

    data = OrderedDict([
        ('top', OrderedDict([
            ('secondlvl1', OrderedDict([
                ('thirdlvl11', np.linspace(1, 100, 10000).astype(np.float64)),
                ('thirdlvl12', b"this is a string"),
                ('thirdlvl13', b"this is another string"),
                ('thirdlvl14', 1),
                ('thirdlvl15', 1.1),
                ('thirdlvl16', np.float32(1.1)),
                ('thirdlvl17', np.float64(1.1)),
                ('thirdlvl18', np.int8(1)),
                ('thirdlvl19', np.int16(1)),
                ('thirdlvl110', np.int32(1)),
                ('thirdlvl111', np.int64(1)),
                ('thirdlvl112', np.uint8(1)),
                ('thirdlvl113', np.uint16(1)),
                ('thirdlvl114', np.uint32(1)),
                ('thirdlvl115', np.uint64(1)),
            ])),
            ('secondlvl2', OrderedDict([
                ('thirdlvl21', np.linspace(1, 100, 10000).astype(np.float32)),
                ('thirdlvl22', b"this is a string"),
                ('thirdlvl23', b"this is another string"),
            ])),
            ('secondlvl3', OrderedDict([
                ('thirdlvl31', np.array(range(1000)).astype(np.int)),
                ('thirdlvl32', b"this is a string"),
            ])),
            ('secondlvl4', OrderedDict([
                ('thirdlvl41', np.linspace(1, 100, 10000)),
                ('thirdlvl42', b"this is a string"),
            ])),
            ('secondlvl5', OrderedDict([
                ('thirdlvl51', np.linspace(1, 100, 10000)),
                ('thirdlvl52', b"this is a string"),
            ])),
            ('secondlvl6', OrderedDict([
                ('thirdlvl61', np.linspace(100, 1000, 10000)),
                ('thirdlvl62', b"this is a string"),
            ])),
        ]))
    ])

    temp_dir = mkdtemp()
    try:
        fpath = os.path.join(temp_dir, 'to_hdf_noattrs.hdf5')
        to_hdf(data, fpath, overwrite=True, warn=False)
        loaded_data1 = from_hdf(fpath)
        assert data.keys() == loaded_data1.keys()
        assert recursiveEquality(data, loaded_data1), \
                str(data) + "\n" + str(loaded_data1)

        attrs = OrderedDict([
            ('float', 9.98237),
            ('float32', np.float32(1.)),
            ('float64', np.float64(1.)),
            ('pi', np.float64(np.pi)),

            ('string', "string attribute!"),

            ('int', 1),
            ('int8', np.int8(1)),
            ('int16', np.int16(1)),
            ('int32', np.int32(1)),
            ('int64', np.int64(1)),

            ('uint8', np.uint8(1)),
            ('uint16', np.uint16(1)),
            ('uint32', np.uint32(1)),
            ('uint64', np.uint64(1)),

            ('bool', True),
            ('bool8', np.bool8(True)),
            ('bool_', np.bool_(True)),
        ])

        attr_type_checkers = {
            "float": lambda x: isinstance(x, float),
            "float32": lambda x: x.dtype == np.float32,
            "float64": lambda x: x.dtype == np.float64,
            "pi": lambda x: x.dtype == np.float64,

            "string": lambda x: isinstance(x, string_types),

            "int": lambda x: isinstance(x, int),
            "int8": lambda x: x.dtype == np.int8,
            "int16": lambda x: x.dtype == np.int16,
            "int32": lambda x: x.dtype == np.int32,
            "int64": lambda x: x.dtype == np.int64,

            "uint8": lambda x: x.dtype == np.uint8,
            "uint16": lambda x: x.dtype == np.uint16,
            "uint32": lambda x: x.dtype == np.uint32,
            "uint64": lambda x: x.dtype == np.uint64,

            "bool": lambda x: isinstance(x, bool),
            "bool8": lambda x: x.dtype == np.bool8,
            "bool_": lambda x: x.dtype == np.bool_,
        }

        fpath = os.path.join(temp_dir, 'to_hdf_withattrs.hdf5')
        to_hdf(data, fpath, attrs=attrs, overwrite=True, warn=False)
        loaded_data2 = from_hdf(fpath)
        loaded_attrs = loaded_data2.attrs
        assert data.keys() == loaded_data2.keys()
        assert attrs.keys() == loaded_attrs.keys(), \
                '\n' + str(attrs.keys()) + '\n' + str(loaded_attrs.keys())
        assert recursiveEquality(data, loaded_data2)
        assert recursiveEquality(attrs, loaded_attrs)

        for key, val in attrs.items():
            tgt_type_checker = attr_type_checkers[key]
            assert tgt_type_checker(val), \
                    "key '%s': val '%s' is type '%s'" % \
                    (key, val, type(loaded_attrs[key]))
    finally:
        rmtree(temp_dir)

    logging.info('<< PASS : test_hdf >>')
Beispiel #44
0
    def _compute_nominal_transforms(self):
        """Compute cross-section transforms."""
        logging.info('Updating xsec.genie cross-section histograms...')

        self.load_xsec_splines()
        livetime = self._ev_param(self.params['livetime'].value)
        ice_p = self._ev_param(self.params['ice_p'].value)
        fid_vol = self._ev_param(self.params['fid_vol'].value)
        mr_h20 = self._ev_param(self.params['mr_h20'].value)
        x_energy_scale = self.params['x_energy_scale'].value

        input_binning = self.input_binning

        ebins = input_binning.true_energy
        for idx, name in enumerate(input_binning.names):
            if 'true_energy' in name:
                e_idx = idx

        xsec_transforms = {}
        for flav in self.input_names:
            for int_ in ALL_NUINT_TYPES:
                flavint = flav + '_' + str(int_)
                logging.debug('Obtaining cross-sections for %s', flavint)
                xsec_map = self.xsec.get_map(flavint,
                                             MultiDimBinning([ebins]),
                                             x_energy_scale=x_energy_scale)

                def func(idx):
                    if idx == e_idx:
                        return xsec_map.hist
                    return tuple(range(input_binning.shape[idx]))

                num_dims = input_binning.num_dims
                xsec_trns = np.meshgrid(*map(func, range(num_dims)),
                                        indexing='ij')[e_idx]
                xsec_trns *= (livetime * fid_vol * (ice_p / mr_h20) *
                              (6.022140857e+23 / ureg.mol))
                xsec_transforms[NuFlavInt(flavint)] = xsec_trns

        nominal_transforms = []
        for flavint_group in self.transform_groups:
            flav_names = [str(flav) for flav in flavint_group.flavs]
            for input_name in self.input_names:
                if input_name not in flav_names:
                    continue

                xform_array = []
                for flavint in flavint_group.flavints:
                    if flavint in xsec_transforms:
                        xform_array.append(xsec_transforms[flavint])
                xform_array = reduce(add, xform_array)

                xform = BinnedTensorTransform(
                    input_names=input_name,
                    output_name=str(flavint_group),
                    input_binning=input_binning,
                    output_binning=self.output_binning,
                    xform_array=xform_array)
                nominal_transforms.append(xform)

        return TransformSet(transforms=nominal_transforms)
def prepare_interpolated_fit(
    nominal_dataset, sys_datasets, params, fit_directory, interpolation_param_spec,
    combine_regex=None, log=False, **hypersurface_fit_kw
):
    '''
    Writes steering files for fitting hypersurfaces on a grid of arbitrary parameters.
    The fits can then be run on a cluster with `run_interpolated_fit`.

    Parameters
    ----------
    nominal_dataset : dict
        Definition of the nominal dataset. Specifies the pipleline with which the maps
        can be created, and the values of all systematic parameters used to produced the
        dataset.
        Format must be:
            nominal_dataset = {
                "pipeline_cfg" = <pipeline cfg file (either cfg file path or dict)>),
                "sys_params" = { param_0_name : param_0_value_in_dataset, ..., param_N_name : param_N_value_in_dataset }
            }
        Sys params must correspond to the provided HypersurfaceParam instances provided
        in the `params` arg.

    sys_datasets : list of dicts
        List of dicts, where each dict defines one of the systematics datasets to be
        fitted. The format of each dict is the same as explained for `nominal_dataset`

    params : list of HypersurfaceParams
        List of HypersurfaceParams instances that define the hypersurface. Note that
        this defined ALL hypersurfaces fitted in this function, e.g. only supports a
        single parameterisation for all maps (this is almost always what you want).

    output_directory : str
        Directory in which the fits will be run. Steering files for the fits to be run
        will be stored here.

    combine_regex : list of str, or None
        List of string regex expressions that will be used for merging maps. Used to
        combine similar species. Must be something that can be passed to the
        `MapSet.combine_re` function (see that functions docs for more details). Choose
        `None` is do not want to perform this merging.
    
    interpolation_param_spec : collections.OrderedDict
        Specification of parameter grid that hypersurfaces should be interpolated over.
        The dict should have the following form::
            interpolation_param_spec = {
                'param1': {"values": [val1_1, val1_2, ...], "scales_log": True/False}
                'param2': {"values": [val2_1, val2_2, ...], "scales_log": True/False}
                ...
                'paramN': {"values": [valN_1, valN_2, ...], "scales_log": True/False}
            }
        The hypersurfaces will be fit on an N-dimensional rectilinear grid over
        parameters 1 to N. The flag `scales_log` indicates that the interpolation over
        that parameter should happen in log-space.

    hypersurface_fit_kw : kwargs
        kwargs will be passed on to the calls to `Hypersurface.fit`
    '''

    # Take (deep) copies of lists/dicts to avoid modifying the originals
    # Useful for cases where this function is called in a loop (e.g. leave-one-out tests)
    nominal_dataset = copy.deepcopy(nominal_dataset)
    sys_datasets = copy.deepcopy(sys_datasets)
    params = copy.deepcopy(params)

    # Check types
    assert isinstance(sys_datasets, collections.Sequence)
    assert isinstance(params, collections.Sequence)
    assert isinstance(fit_directory, str)
    # there must not be any ambiguity between fitting the hypersurfaces and 
    # interpolating them later
    msg = "interpolation params must be specified as a dict with ordered keys"
    assert isinstance(interpolation_param_spec, collections.OrderedDict), msg
    for k, v in interpolation_param_spec.items():
        assert set(v.keys()) == {"values", "scales_log"}
        assert isinstance(v["values"], collections.Sequence)
        # We need to extract the magnitudes from the Quantities to avoid a
        # UnitStrippedWarning. For some reason, doing `np.min(v["values"])` messes up
        # the data structure inside the values in a way that can cause a crash when we
        # try to serialize the values later. Lesson: Stripping units inadvertently can
        # have strange, unforeseen consequences.
        mags = [x.m for x in v["values"]]
        if v["scales_log"] and np.min(mags) <= 0:
            raise ValueError("A log-scaling parameter cannot be equal to or less "
                "than zero!")
    
    # Check output format and path
    assert os.path.isdir(fit_directory), "fit directory does not exist"
    
    # Check formatting of datasets is as expected
    all_datasets = [nominal_dataset] + sys_datasets
    for dataset in all_datasets:
        assert isinstance(dataset, collections.Mapping)
        assert "pipeline_cfg" in dataset
        assert isinstance(dataset["pipeline_cfg"], (str, collections.Mapping))
        assert "sys_params" in dataset
        assert isinstance(dataset["sys_params"], collections.Mapping)
        
        dataset["pipeline_cfg"] = serialize_pipeline_cfg(dataset["pipeline_cfg"])

    # Check params
    assert len(params) >= 1
    for p in params:
        assert isinstance(p, HypersurfaceParam)

    # Report inputs
    msg = "Hypersurface fit details :\n"
    msg += f"  Num params            : {len(params)}\n"
    msg += f"  Num fit coefficients  : {sum([p.num_fit_coeffts for p in params])}\n"
    msg += f"  Num datasets          : 1 nominal + {len(sys_datasets)} systematics\n"
    msg += f"  Nominal values        : {nominal_dataset['sys_params']}\n"
    msg += "Hypersurface fits are prepared on the following grid:\n"
    msg += str(interpolation_param_spec)
    logging.info(msg)

    # because we require this to be an OrderedDict, there is no ambiguity in the
    # construction of the mesh here
    param_names = list(interpolation_param_spec.keys())
    grid_shape = tuple(len(v["values"]) for v in interpolation_param_spec.values())

    # We store all information needed to run a fit in metadata
    metadata = collections.OrderedDict(
        interpolation_param_spec=interpolation_param_spec,
        interpolation_param_names=param_names,  # convenience
        grid_shape=grid_shape,  # convenience
        nominal_dataset=nominal_dataset,
        sys_datasets=sys_datasets,
        hypersurface_params=params,
        combine_regex=combine_regex,
        log=log,
        hypersurface_fit_kw=hypersurface_fit_kw
    )
    
    to_json(metadata, os.path.join(fit_directory, "metadata.json"))
    
    # we write on JSON file for each grid point
    for job_idx, grid_idx in enumerate(np.ndindex(grid_shape)):
        # Although this is technically redundant, we store the parameter values
        # explicitly for each grid point.
        param_values = {}
        for i, n in enumerate(param_names):
            param_values[n] = interpolation_param_spec[n]["values"][grid_idx[i]]

        gridpoint_data = {
            "param_values": param_values,
            "hs_fit": None,
            "job_idx": job_idx,
            "grid_idx": grid_idx,
            "fit_successful": False
        }
        to_json(gridpoint_data, os.path.join(fit_directory,
            f"gridpoint_{job_idx:06d}.json.bz2"))

    logging.info(f"Grid fit preparation complete! Total number of jobs: {job_idx+1}")
    return job_idx+1  # zero-indexing
Beispiel #46
0
    def get_combined_xsec(fpath, ver=None):
        """Load the cross-section values from a ROOT file and instantiate a
        CombinedSpline object."""
        # NOTE: ROOT import here as it is optional but still want to import
        # module for e.g. building docs
        import ROOT

        fpath = find_resource(fpath)
        logging.info('Loading GENIE ROOT cross-section file %s', fpath)

        # Name of neutrino flavours in the ROOT file.
        flavs = ('nu_e', 'nu_mu', 'nu_tau', 'nu_e_bar', 'nu_mu_bar',
                 'nu_tau_bar')

        rfile = ROOT.TFile.Open(fpath, 'read')  # pylint: disable=no-member
        xsec_splines = FlavIntData()
        for flav in flavs:
            for int_ in ALL_NUINT_TYPES:
                xsec_splines[flav, int_] = {}
                for part in ('O16', 'H1'):
                    str_repr = flav + '_' + part + '/' + 'tot_' + str(int_)
                    xsec_splines[flav + str(int_)][part] = \
                        ROOT.gDirectory.Get(str_repr) # pylint: disable=no-member
        rfile.Close()

        def eval_spl(spline,
                     binning,
                     out_units=ureg.m**2,
                     x_energy_scale=1,
                     **kwargs):
            init_names = ['true_energy']
            init_units = [ureg.GeV]

            if set(binning.names) != set(init_names):
                raise ValueError('Input binning names {0} does not match '
                                 'instantiation binning names '
                                 '{1}'.format(binning.names, init_names))

            if set(map(str, binning.units)) != set(map(str, init_units)):
                for name in init_names:
                    binning[name].to(init_units)

            bin_centers = [x.m for x in binning.weighted_centers][0]

            nu_O16, nu_H1 = [], []
            for e_val in bin_centers:
                nu_O16.append(spline['O16'].Eval(e_val))
                nu_H1.append(spline['H1'].Eval(e_val))

            nu_O16, nu_H1 = map(np.array, (nu_O16, nu_H1))
            nu_xsec = ((0.8879 * nu_O16) +
                       (0.1121 * nu_H1)) * 1E-38 * ureg.cm**2

            nu_xsec_hist = nu_xsec.to(out_units).magnitude
            return Map(hist=nu_xsec_hist, binning=binning, **kwargs)

        def validate_spl(binning):
            if np.all(binning.true_energy.midpoints.m > 1E3):
                raise ValueError('Energy value {0} out of range in array '
                                 '{0}'.format(binning.true_energy))

        inXSec = []
        for flav in flavs:
            for int_ in ALL_NUINT_TYPES:
                flavint = NuFlavInt(flav + str(int_))
                xsec = Spline(name=str(flavint),
                              spline=xsec_splines[flavint],
                              eval_spl=eval_spl,
                              validate_spl=validate_spl)
                inXSec.append(xsec)

        return CombinedSpline(inXSec, interactions=True, ver=ver)
Beispiel #47
0
    # Read the template settings
    template_settings = from_json(args.template_settings)

    # This file only contains the number of test points for each parameter (and
    # perhaps eventually a non-linearity criterion)
    grid_settings = from_json(args.grid_settings)

    # Get the Fisher matrices for the desired hierarchy and fiducial settings
    fisher_matrices = get_fisher_matrices(template_settings=template_settings,
                                          grid_settings=grid_settings,
                                          IMH=args.inverted_truth,
                                          NMH=args.normal_truth,
                                          dump_all_stages=args.dump_all_stages,
                                          save_templates=args.save_templates,
                                          outdir=args.outdir)

    # Fisher matrices are saved in any case
    for data_tag in fisher_matrices:
        fisher_basename = 'fisher_data_%s' % data_tag
        for chan in fisher_matrices[data_tag]:
            if chan == 'comb':
                outfile = os.path.join(args.outdir, fisher_basename + '.json')
                logging.info("%s: writing combined Fisher matrix to %s" %
                             (data_tag, outfile))
            else:
                outfile = os.path.join(args.outdir,
                                       fisher_basename + '_%s.json' % chan)
                logging.info("%s: writing Fisher matrix for channel %s to %s" %
                             (data_tag, chan, outfile))
            fisher_matrices[data_tag][chan].saveFile(outfile)
Beispiel #48
0
def run_unit_tests(path=PISA_PATH,
                   allow_missing=OPTIONAL_MODULES,
                   verbosity=Levels.WARN):
    """Run all tests found at `path` (or recursively below if `path` is a
    directory).

    Each module is imported and each test function is run initially with
    `set_verbosity(verbosity)`, but if an exception is caught, the module is
    re-imported or the test function is re-run with
    `set_verbosity(Levels.TRACE)`, then the traceback from the (original)
    exception emitted is displayed.

    Parameters
    ----------
    path : str
        Path to file or directory

    allow_missing : None or sequence of str

    verbosity : int in pisa.utils.log.Levels

    Raises
    ------
    Exception
        If any import or test fails not in `allow_missing`

    """
    set_verbosity(verbosity)
    logging.info("%sPlatform information:", PFX)
    logging.info("%s  HOSTNAME = %s", PFX, socket.gethostname())
    logging.info("%s  FQDN = %s", PFX, socket.getfqdn())
    logging.info("%s  OS = %s %s", PFX, platform.system(), platform.release())
    for key, val in cpuinfo.get_cpu_info().items():
        logging.info("%s  %s = %s", PFX, key, val)
    logging.info(PFX)
    logging.info("%sModule versions:", PFX)
    for module_name in REQUIRED_MODULES + OPTIONAL_MODULES:
        try:
            module = import_module(module_name)
        except ImportError:
            if module_name in REQUIRED_MODULES:
                raise
            ver = "optional module not installed or not import-able"
        else:
            if hasattr(module, "__version__"):
                ver = module.__version__
            else:
                ver = "?"
        logging.info("%s  %s : %s", PFX, module_name, ver)
    logging.info(PFX)

    path = expand(path, absolute=True, resolve_symlinks=True)
    if allow_missing is None:
        allow_missing = []
    elif isinstance(allow_missing, str):
        allow_missing = [allow_missing]

    tests = find_unit_tests(path)

    module_pypaths_succeeded = []
    module_pypaths_failed = []
    module_pypaths_failed_ignored = []
    test_pypaths_succeeded = []
    test_pypaths_failed = []
    test_pypaths_failed_ignored = []

    for rel_file_path, test_func_names in tests.items():
        pypath = ["pisa"] + rel_file_path[:-3].split("/")
        parent_pypath = ".".join(pypath[:-1])
        module_name = pypath[-1].replace(".", "_")
        module_pypath = f"{parent_pypath}.{module_name}"

        try:
            set_verbosity(verbosity)
            logging.info(PFX + f"importing {module_pypath}")

            set_verbosity(Levels.WARN)
            module = import_module(module_pypath, package=parent_pypath)

        except Exception as err:
            if (isinstance(err, ImportError) and hasattr(err, "name")
                    and err.name in allow_missing  # pylint: disable=no-member
                ):
                err_name = err.name  # pylint: disable=no-member
                module_pypaths_failed_ignored.append(module_pypath)
                logging.warning(
                    f"{PFX}module {err_name} failed to import wile importing"
                    f" {module_pypath}, but ok to ignore")
                continue

            module_pypaths_failed.append(module_pypath)

            set_verbosity(verbosity)
            msg = f"<< FAILURE IMPORTING : {module_pypath} >>"
            logging.error(PFX + "=" * len(msg))
            logging.error(PFX + msg)
            logging.error(PFX + "=" * len(msg))

            # Reproduce the failure with full output
            set_verbosity(Levels.TRACE)
            try:
                import_module(module_name, package=parent_pypath)
            except Exception:
                pass

            set_verbosity(Levels.TRACE)
            logging.exception(err)

            set_verbosity(verbosity)
            logging.error(PFX + "#" * len(msg))

            continue

        else:
            module_pypaths_succeeded.append(module_pypath)

        for test_func_name in test_func_names:
            test_pypath = f"{module_pypath}.{test_func_name}"
            try:
                set_verbosity(verbosity)
                logging.debug(PFX + f"getattr({module}, {test_func_name})")

                set_verbosity(Levels.WARN)
                test_func = getattr(module, test_func_name)

                # Run the test function
                set_verbosity(verbosity)
                logging.info(PFX + f"{test_pypath}()")

                set_verbosity(Levels.WARN)
                test_func()

            except Exception as err:
                if (isinstance(err, ImportError) and hasattr(err, "name")
                        and err.name in allow_missing  # pylint: disable=no-member
                    ):
                    err_name = err.name  # pylint: disable=no-member
                    test_pypaths_failed_ignored.append(module_pypath)
                    logging.warning(
                        PFX +
                        f"{test_pypath} failed because module {err_name} failed to"
                        + f" load, but ok to ignore")

                    continue

                test_pypaths_failed.append(test_pypath)
                set_verbosity(verbosity)
                msg = f"<< FAILURE RUNNING : {test_pypath} >>"
                logging.error(PFX + "=" * len(msg))
                logging.error(PFX + msg)
                logging.error(PFX + "=" * len(msg))

                # Reproduce the error with full output

                set_verbosity(Levels.TRACE)
                try:
                    test_func = getattr(module, test_func_name)
                    with np.printoptions(
                            precision=np.finfo(pisa.FTYPE).precision + 2,
                            floatmode="fixed",
                            sign=" ",
                            linewidth=200,
                    ):
                        test_func()
                except Exception:
                    pass

                set_verbosity(Levels.TRACE)
                logging.exception(err)

                set_verbosity(verbosity)
                logging.error(PFX + "#" * len(msg))

            else:
                test_pypaths_succeeded.append(test_pypath)

            finally:
                # remove references to the test function, e.g. to remove refs
                # to pycuda / numba.cuda contexts so these can be closed
                try:
                    del test_func
                except NameError:
                    pass

        # NOTE: Until we get all GPU code into Numba, need to unload pycuda
        # and/or numba.cuda contexts before a module requiring the other one is
        # to be imported.
        # NOTE: the following causes a traceback to be emitted at the very end
        # of the script, regardless of the exception catching here.
        if (pisa.TARGET == "cuda" and pycuda is not None
                and hasattr(pycuda, "autoinit")
                and hasattr(pycuda.autoinit, "context")):
            try:
                pycuda.autoinit.context.detach()
            except Exception:
                pass

        # Attempt to unload the imported module
        # TODO: pipeline, etc. fail as isinstance(service, (Stage, PiStage)) is False
        #if module_pypath in sys.modules and module_pypath != "pisa":
        #    del sys.modules[module_pypath]
        #del module

        # TODO: crashes program; subseqeunt calls in same shell crash(!?!?)
        # if pisa.TARGET == 'cuda' and nbcuda is not None:
        #    try:
        #        nbcuda.close()
        #    except Exception:
        #        pass

    # Summarize results

    n_import_successes = len(module_pypaths_succeeded)
    n_import_failures = len(module_pypaths_failed)
    n_import_failures_ignored = len(module_pypaths_failed_ignored)
    n_test_successes = len(test_pypaths_succeeded)
    n_test_failures = len(test_pypaths_failed)
    n_test_failures_ignored = len(test_pypaths_failed_ignored)

    set_verbosity(verbosity)
    logging.info(
        PFX + f"<< IMPORT TESTS : {n_import_successes} imported,"
        f" {n_import_failures} failed,"
        f" {n_import_failures_ignored} failed to import but ok to ignore >>")
    logging.info(PFX + f"<< UNIT TESTS : {n_test_successes} succeeded,"
                 f" {n_test_failures} failed,"
                 f" {n_test_failures_ignored} failed but ok to ignore >>")

    # Exit with error if any failures (import or unit test)

    if module_pypaths_failed or test_pypaths_failed:
        msgs = []
        if module_pypaths_failed:
            msgs.append(
                f"{n_import_failures} module(s) failed to import:\n  " +
                ", ".join(module_pypaths_failed))
        if test_pypaths_failed:
            msgs.append(f"{n_test_failures} unit test(s) failed:\n  " +
                        ", ".join(test_pypaths_failed))

        # Note the extra newlines before the exception to make it stand out;
        # and newlines after the exception are due to the pycuda error message
        # that is emitted when we call pycuda.autoinit.context.detach()
        sys.stdout.flush()
        sys.stderr.write("\n\n\n")
        raise Exception("\n".join(msgs) + "\n\n\n")
Beispiel #49
0
def test_Events():
    """Unit tests for Events class"""
    from pisa.utils.flavInt import NuFlavInt
    # Instantiate empty object
    events = Events()

    # Instantiate from PISA events HDF5 file
    events = Events(
        'events/events__vlvnt__toy_1_to_80GeV_spidx1.0_cz-1_to_1_1e2evts_set0__unjoined__with_fluxes_honda-2015-spl-solmin-aa.hdf5'
    )

    # Apply a simple cut
    events = events.applyCut('(true_coszen <= 0.5) & (true_energy <= 70)')
    for fi in events.flavints:
        assert np.max(events[fi]['true_coszen']) <= 0.5
        assert np.max(events[fi]['true_energy']) <= 70

    # Apply an "inbounds" cut via a OneDimBinning
    true_e_binning = OneDimBinning(name='true_energy',
                                   num_bins=80,
                                   is_log=True,
                                   domain=[10, 60] * ureg.GeV)
    events = events.keepInbounds(true_e_binning)
    for fi in events.flavints:
        assert np.min(events[fi]['true_energy']) >= 10
        assert np.max(events[fi]['true_energy']) <= 60

    # Apply an "inbounds" cut via a MultiDimBinning
    true_e_binning = OneDimBinning(name='true_energy',
                                   num_bins=80,
                                   is_log=True,
                                   domain=[20, 50] * ureg.GeV)
    true_cz_binning = OneDimBinning(name='true_coszen',
                                    num_bins=40,
                                    is_lin=True,
                                    domain=[-0.8, 0])
    mdb = MultiDimBinning([true_e_binning, true_cz_binning])
    events = events.keepInbounds(mdb)
    for fi in events.flavints:
        assert np.min(events[fi]['true_energy']) >= 20
        assert np.max(events[fi]['true_energy']) <= 50
        assert np.min(events[fi]['true_coszen']) >= -0.8
        assert np.max(events[fi]['true_coszen']) <= 0

    # Now try to apply a cut that fails on one flav/int (since the field will
    # be missing) and make sure that the cut did not get applied anywhere in
    # the end (i.e., it is rolled back)
    sub_evts = events['nutaunc']
    sub_evts.pop('true_energy')
    events['nutaunc'] = sub_evts
    try:
        events = events.applyCut('(true_energy >= 30) & (true_energy <= 40)')
    except Exception:
        pass
    else:
        raise Exception('Should not have been able to apply the cut!')
    for fi in events.flavints:
        if fi == NuFlavInt('nutaunc'):
            continue
        assert np.min(events[fi]['true_energy']) < 30

    logging.info(
        '<< PASS : test_Events >> (note:'
        ' "[   ERROR] Events object is in an inconsistent state. Reverting cut'
        ' for all flavInts." message above **is expected**.)')
Beispiel #50
0
    def setup_function(self):

        self.data.representation = self.calc_mode

        #
        # Init arrays
        #

        # Prepare some array shapes
        gradient_params_shape = (len(self.gradient_param_names), )

        if self.data.is_map:
            # speed up calculation by adding links
            # as nominal flux doesn't depend on the (outgoing) flavour
            self.data.link_containers('nu', [
                'nue_cc', 'numu_cc', 'nutau_cc', 'nue_nc', 'numu_nc',
                'nutau_nc'
            ])

            self.data.link_containers('nubar', [
                'nuebar_cc', 'numubar_cc', 'nutaubar_cc', 'nuebar_nc',
                'numubar_nc', 'nutaubar_nc'
            ])

        # Loop over containers
        for container in self.data:

            # Define shapes for containers

            # TODO maybe include toggles for nutau (only needed if prompt
            # considered) and for nu+nubar (only needed if nu->nubar
            # oscillations included) for better speed/memory performance

            # [ N events, 2 flavors in flux, nu vs nubar ]
            # SDB - reduced flavours to 2 (nue, numu) since nutau flux not
            # stored in MCEq splines
            flux_container_shape = (container.size, 2)
            gradients_shape = tuple(
                list(flux_container_shape) + list(gradient_params_shape))

            container["nu_flux"] = np.full(flux_container_shape,
                                           np.NaN,
                                           dtype=FTYPE)
            container["gradients"] = np.full(gradients_shape,
                                             np.NaN,
                                             dtype=FTYPE)

        # Also create an array container to hold the gradient parameter values
        # Only want this once, e.g. not once per container
        self.gradient_params = np.empty(gradient_params_shape, dtype=FTYPE)

        #
        # Load MCEq splines
        #

        # Have splines for each Barr parameter, plus +/- versions of each
        # Barr parameter corresponding to mesons/antimesons.

        # For a given Barr parameter, an underlying dictionary have the following
        # keywords: "dnumu", "dnumubar", "dnue", dnuebar"

        # Units are changed to m^-2 in creates_splines.., rather than cm^2 which
        # is the unit of calculation in MCEq!!!!

        # Note that doing this all on CPUs, since the splines reside on the CPUs
        # The actual `compute_function` computation can be done on GPUs though

        # Load the MCEq splines
        spline_file = find_resource(self.table_file)
        logging.info("Loading MCEq spline tables from : %s", spline_file)
        # Encoding is to support pickle files created with python v2
        self.spline_tables_dict = pickle.load(BZ2File(spline_file),
                                              encoding="latin1")

        # Ensure that the user is not loading an incompatible spline
        for bp in self.barr_param_names:
            bp_p = bp + '+'  # meson
            bp_m = bp + '-'  # antimeson
            assert bp_p in self.spline_tables_dict.keys(), (
                "Gradient parameter '%s' missing from table" % bp_p)
            assert bp_m in self.spline_tables_dict.keys(), (
                "Gradient parameter '%s' missing from table" % bp_m)

        # Loop over containers
        for container in self.data:

            # Grab containers here once to save time
            # TODO make spline generation script store splines directly in
            # terms of energy, not ln(energy)
            true_log_energy = np.log(container["true_energy"])
            true_abs_coszen = np.abs(container["true_coszen"])
            gradients = container["gradients"]
            nubar = container["nubar"]

            #
            # Flux gradients
            #

            # Evaluate splines to get the flux graidents w.r.t the Barr parameter values
            # Need to correctly map nu/nubar and flavor to the output arrays

            # Loop over parameters
            for (
                    gradient_param_name,
                    gradient_param_idx,
            ) in self.gradient_param_indices.items():

                # nue(bar)
                self._eval_spline(
                    true_log_energy=true_log_energy,
                    true_abs_coszen=true_abs_coszen,
                    spline=self.spline_tables_dict[gradient_param_name]
                    ["dnue" if nubar > 0 else "dnuebar"],
                    out=gradients[:, 0, gradient_param_idx],
                )

                # numu(bar)
                self._eval_spline(
                    true_log_energy=true_log_energy,
                    true_abs_coszen=true_abs_coszen,
                    spline=self.spline_tables_dict[gradient_param_name]
                    ["dnumu" if nubar > 0 else "dnumubar"],
                    out=gradients[:, 1, gradient_param_idx],
                )

                # nutau(bar)
                # TODO include nutau flux in splines
                # SDB - there is no nutau flux in splines
                ## gradients[:, 2, gradient_param_idx].fill(0.0)

            # Tell the smart arrays we've changed the flux gradient values on the host
            container.mark_changed("gradients")

        # don't forget to un-link everything again
        self.data.unlink_containers()
Beispiel #51
0
    def applyCut(self, keep_criteria):
        """Apply a cut by specifying criteria for keeping events. The cut must
        be successfully applied to all flav/ints in the events object before
        the changes are kept, otherwise the cuts are reverted.

        Parameters
        ----------
        keep_criteria : string
            Any string interpretable as numpy boolean expression.

        Returns
        -------
        remaining_events : Events
            An Events object with the remaining events (deepcopied) and with
            updated cut metadata including `keep_criteria`.

        Examples
        --------
        Keep events with true energies in [1, 80] GeV (note that units are not
        recognized, so have to be handled outside this method)

        >>> remaining = applyCut("(true_energy >= 1) & (true_energy <= 80)")

        Do the opposite with "~" inverting the criteria

        >>> remaining = applyCut("~((true_energy >= 1) & (true_energy <= 80))")

        Numpy namespace is available for use via `np` prefix

        >>> remaining = applyCut("np.log10(true_energy) >= 0")

        """
        # TODO(shivesh): function does not pass tests
        raise NotImplementedError

        if keep_criteria in self.metadata['cuts']:
            return

        assert isinstance(keep_criteria, basestring)

        fig_to_process = []
        if self.contains_neutrinos:
            fig_to_process += deepcopy(self.flavint_groups)
        if self.contains_muons:
            fig_to_process += ['muons']
        if self.contains_noise:
            fig_to_process += ['noise']

        logging.info("Applying cut to %s : %s" %
                     (fig_to_process, keep_criteria))

        fig_processed = []
        remaining_data = {}
        for fig in fig_to_process:
            data_dict = self[fig]
            field_names = data_dict.keys()

            # TODO: handle unicode:
            #  * translate crit to unicode (easiest to hack but could be
            #    problematic elsewhere)
            #  * translate field names to ascii (probably should be done at
            #    the from_hdf stage?)

            # Replace simple field names with full paths into the data that
            # lives in this object
            crit_str = (keep_criteria)
            for field_name in field_names:
                crit_str = crit_str.replace(
                    field_name, 'self["%s"]["%s"]' % (fig, field_name))
            mask = eval(crit_str)
            remaining_data[fig] = {
                k: v[mask]
                for k, v in self[fig].iteritems()
            }
            fig_processed.append(fig)

        remaining_events = Events()
        remaining_events.metadata.update(deepcopy(self.metadata))
        remaining_events.metadata['cuts'].append(keep_criteria)
        for fig in fig_to_process:
            remaining_events[fig] = deepcopy(remaining_data.pop(fig))

        return remaining_events
Beispiel #52
0
def test_get_random_state():
    """Unit tests for get_random_state function"""
    # Instantiate random states in all legal ways
    rstates = {
        0: get_random_state(None),
        1: get_random_state('rand'),
        2: get_random_state('random'),
        3: get_random_state(np.random.RandomState(0)),
        4: get_random_state(0),
        5: get_random_state([
            0,
        ]),
        6: get_random_state([0, 0]),
        7: get_random_state([0, 0, 0]),
    }
    rstates[8] = get_random_state(rstates[4].get_state())

    # rs 4-8 should be identical
    ref_id, ref = None, None
    for rs_id, rs in rstates.items():
        if rs_id < 3:
            continue
        if ref is None:
            ref_id = rs_id
            ref = rs.rand(1000)
        else:
            test = rs.rand(1000)
            assert np.array_equal(test, ref), f'rs{rs_id} != rs{ref_id}'

    # Already generated 1k, so generating 2k more gets us 3k; pick off last 1k
    ref = rstates[ref_id].rand(2000)[1000:]
    test = get_random_state(random_state=0, jumpahead=2000).rand(1000)
    assert np.array_equal(test,
                          ref), f'jumpahead=1k: rs != rs{ref_id}[2000:3000]'

    # Test stability of random number generator over time; following were
    # retrieved on 2020-03-19 using numpy 1.18.1 via .. ::
    #
    #   np.array2string(
    #       np.random.RandomState(0).rand(100), precision=20, separator=', '
    #   )
    #

    # pylint: disable=bad-whitespace
    ref = np.array([
        0.5488135039273248,
        0.7151893663724195,
        0.6027633760716439,
        0.5448831829968969,
        0.4236547993389047,
        0.6458941130666561,
        0.4375872112626925,
        0.8917730007820798,
        0.9636627605010293,
        0.3834415188257777,
        0.7917250380826646,
        0.5288949197529045,
        0.5680445610939323,
        0.925596638292661,
        0.07103605819788694,
        0.08712929970154071,
        0.02021839744032572,
        0.832619845547938,
        0.7781567509498505,
        0.8700121482468192,
        0.978618342232764,
        0.7991585642167236,
        0.46147936225293185,
        0.7805291762864555,
        0.11827442586893322,
        0.6399210213275238,
        0.1433532874090464,
        0.9446689170495839,
        0.5218483217500717,
        0.4146619399905236,
        0.26455561210462697,
        0.7742336894342167,
        0.45615033221654855,
        0.5684339488686485,
        0.018789800436355142,
        0.6176354970758771,
        0.6120957227224214,
        0.6169339968747569,
        0.9437480785146242,
        0.6818202991034834,
        0.359507900573786,
        0.43703195379934145,
        0.6976311959272649,
        0.06022547162926983,
        0.6667667154456677,
        0.6706378696181594,
        0.2103825610738409,
        0.1289262976548533,
        0.31542835092418386,
        0.3637107709426226,
        0.5701967704178796,
        0.43860151346232035,
        0.9883738380592262,
        0.10204481074802807,
        0.2088767560948347,
        0.16130951788499626,
        0.6531083254653984,
        0.2532916025397821,
        0.4663107728563063,
        0.24442559200160274,
        0.15896958364551972,
        0.11037514116430513,
        0.6563295894652734,
        0.1381829513486138,
        0.1965823616800535,
        0.3687251706609641,
        0.8209932298479351,
        0.09710127579306127,
        0.8379449074988039,
        0.09609840789396307,
        0.9764594650133958,
        0.4686512016477016,
        0.9767610881903371,
        0.604845519745046,
        0.7392635793983017,
        0.039187792254320675,
        0.2828069625764096,
        0.1201965612131689,
        0.29614019752214493,
        0.11872771895424405,
        0.317983179393976,
        0.41426299451466997,
        0.06414749634878436,
        0.6924721193700198,
        0.5666014542065752,
        0.2653894909394454,
        0.5232480534666997,
        0.09394051075844168,
        0.5759464955561793,
        0.9292961975762141,
        0.31856895245132366,
        0.6674103799636817,
        0.13179786240439217,
        0.7163272041185655,
        0.2894060929472011,
        0.18319136200711683,
        0.5865129348100832,
        0.020107546187493552,
        0.8289400292173631,
        0.004695476192547066,
    ])
    test = np.random.RandomState(0).rand(100)
    assert np.array_equal(test, ref), 'random number generator changed!'

    logging.info('<< PASS : test_get_random_state >>')
Beispiel #53
0
def test_kde_stash(verbosity=Levels.WARN):
    """Unit test for the hist stashing feature.

    Hist stashing can greatly speed up fits as long as the only free parameters
    are in stages that work on the output histograms, rather than the individual
    events. In particular, it should be strictly equivalent to either scale all weights
    by a factor and then running the KDE, or to first calculate the KDE and then scale
    all the bin counts by the same factor. This test ensures that the order of
    operation really doesn't matter.

    This should apply also to the errors, independent of whether the bootstrapping
    method or the utils.set_variance stage was used to produce them.
    """

    import pytest
    from numpy.testing import assert_array_equal, assert_allclose

    set_verbosity(verbosity)

    def assert_correct_scaling(pipeline_cfg, fixed_errors=False):
        """Run the pipeline and assert that scaling by a factor of two is correct."""
        dmaker = DistributionMaker([pipeline_cfg])
        out = dmaker.get_outputs(return_sum="true")[0]
        dmaker.pipelines[0].params.weight_scale = 2.0
        out2 = dmaker.get_outputs(return_sum="true")[0]
        if fixed_errors:
            # this is special: We expect that the nominal counts are multiplied, but
            # that hte errors stay fixed (applies to set_variance errors)
            assert_array_equal(out.nominal_values * 2.0, out2.nominal_values)
            assert_array_equal(out.std_devs, out2.std_devs)
        else:
            assert out * 2.0 == out2

    ## KDE without errors

    # First aeff, then KDE
    test_cfg = deepcopy(TEST_CONFIGS.pipe_cfg)
    test_cfg[("data", "toy_event_generator")] = deepcopy(
        TEST_CONFIGS.event_generator_cfg
    )
    test_cfg[("aeff", "weight")] = deepcopy(TEST_CONFIGS.aeff_cfg)
    test_cfg[("utils", "kde")] = deepcopy(TEST_CONFIGS.kde_cfg)
    assert_correct_scaling(test_cfg)

    # First KDE, then aeff, with stashing
    test_cfg = deepcopy(TEST_CONFIGS.pipe_cfg)
    test_cfg[("data", "toy_event_generator")] = deepcopy(
        TEST_CONFIGS.event_generator_cfg
    )
    test_cfg[("utils", "kde")] = deepcopy(TEST_CONFIGS.kde_cfg)
    test_cfg[("aeff", "weight")] = deepcopy(TEST_CONFIGS.aeff_cfg)
    # turn on stashing
    test_cfg[("utils", "kde")]["stash_hists"] = True
    # Change aeff calculation to binned mode (i.e. multiply bin counts)
    test_cfg[("aeff", "weight")]["calc_mode"] = TEST_BINNING
    test_cfg[("aeff", "weight")]["apply_mode"] = TEST_BINNING
    assert_correct_scaling(test_cfg)

    ## KDE with bootstrap errors

    # First aeff, then KDE with bootstrap
    test_cfg = deepcopy(TEST_CONFIGS.pipe_cfg)
    test_cfg[("data", "toy_event_generator")] = deepcopy(
        TEST_CONFIGS.event_generator_cfg
    )
    test_cfg[("aeff", "weight")] = deepcopy(TEST_CONFIGS.aeff_cfg)
    test_cfg[("utils", "kde")] = deepcopy(TEST_CONFIGS.kde_cfg)
    # turn OFF stashing
    test_cfg[("utils", "kde")]["stash_hists"] = False
    # turn on bootstrapping
    test_cfg[("utils", "kde")]["bootstrap"] = True
    # return the errors
    test_cfg["pipeline"]["output_key"] = ("weights", "errors")
    assert_correct_scaling(test_cfg)

    # First KDE with stashed hists and bootstrap, then aeff
    test_cfg = deepcopy(TEST_CONFIGS.pipe_cfg)
    test_cfg[("data", "toy_event_generator")] = deepcopy(
        TEST_CONFIGS.event_generator_cfg
    )
    test_cfg[("utils", "kde")] = deepcopy(TEST_CONFIGS.kde_cfg)
    test_cfg[("aeff", "weight")] = deepcopy(TEST_CONFIGS.aeff_cfg)
    # turn on stashing
    test_cfg[("utils", "kde")]["stash_hists"] = True
    # turn on bootstrapping
    test_cfg[("utils", "kde")]["bootstrap"] = True
    # return the errors
    test_cfg["pipeline"]["output_key"] = ("weights", "errors")
    # need to change mode to binned
    test_cfg[("aeff", "weight")]["calc_mode"] = TEST_BINNING
    test_cfg[("aeff", "weight")]["apply_mode"] = TEST_BINNING
    assert_correct_scaling(test_cfg)

    ## KDE with errors calculated using set_variance stage

    # first aeff, then KDE and set_variance
    test_cfg = deepcopy(TEST_CONFIGS.pipe_cfg)
    test_cfg[("data", "toy_event_generator")] = deepcopy(
        TEST_CONFIGS.event_generator_cfg
    )
    test_cfg[("aeff", "weight")] = deepcopy(TEST_CONFIGS.aeff_cfg)
    test_cfg[("utils", "kde")] = deepcopy(TEST_CONFIGS.kde_cfg)
    test_cfg[("utils", "set_variance")] = deepcopy(TEST_CONFIGS.set_variance_cfg)
    # turn on stashing
    test_cfg[("utils", "kde")]["stash_hists"] = False
    # turn OFF bootstrapping
    test_cfg[("utils", "kde")]["bootstrap"] = False
    # return the errors
    test_cfg["pipeline"]["output_key"] = ("weights", "errors")
    # The set_variance stage only calculates errors the first time that the pipeline
    # is evaluated, these errors are stored and re-instated on any sub-sequent
    # evaluations. We expect therefore that only the nominal values scale.
    assert_correct_scaling(test_cfg, fixed_errors=True)

    # first KDE and set_variance, then aeff
    test_cfg = deepcopy(TEST_CONFIGS.pipe_cfg)
    test_cfg[("data", "toy_event_generator")] = deepcopy(
        TEST_CONFIGS.event_generator_cfg
    )
    test_cfg[("utils", "kde")] = deepcopy(TEST_CONFIGS.kde_cfg)
    test_cfg[("aeff", "weight")] = deepcopy(TEST_CONFIGS.aeff_cfg)
    # It is still important that the `set_variance` stage is *last*.
    test_cfg[("utils", "set_variance")] = deepcopy(TEST_CONFIGS.set_variance_cfg)
    # turn on stashing
    test_cfg[("utils", "kde")]["stash_hists"] = True
    # turn OFF bootstrapping
    test_cfg[("utils", "kde")]["bootstrap"] = False
    # return the errors
    test_cfg["pipeline"]["output_key"] = ("weights", "errors")
    # need to change mode to binned
    test_cfg[("aeff", "weight")]["calc_mode"] = TEST_BINNING
    test_cfg[("aeff", "weight")]["apply_mode"] = TEST_BINNING
    # We ensure that the behavior is the same as it has been when we were not stashing
    # the histograms and used set_variance.
    assert_correct_scaling(test_cfg, fixed_errors=True)

    # Using the wrong order (not putting set_variance last)
    test_cfg = deepcopy(TEST_CONFIGS.pipe_cfg)
    test_cfg[("data", "toy_event_generator")] = deepcopy(
        TEST_CONFIGS.event_generator_cfg
    )
    test_cfg[("utils", "kde")] = deepcopy(TEST_CONFIGS.kde_cfg)
    # If set_variance is not the last stage, this breaks. The reason is a slightly
    # silly design of set_variance. It should have been constructed such that the
    # total normalization is always divided out, but it wasn't. The way it is
    # constructed now, it is basically tuned by the scaling factor to work for the
    # given livetime and breaks immediately when that changes.
    test_cfg[("utils", "set_variance")] = deepcopy(TEST_CONFIGS.set_variance_cfg)
    test_cfg[("aeff", "weight")] = deepcopy(TEST_CONFIGS.aeff_cfg)
    # turn on stashing
    test_cfg[("utils", "kde")]["stash_hists"] = True
    # turn OFF bootstrapping
    test_cfg[("utils", "kde")]["bootstrap"] = False
    # return the errors
    test_cfg["pipeline"]["output_key"] = ("weights", "errors")
    # need to change mode to binned
    test_cfg[("aeff", "weight")]["calc_mode"] = TEST_BINNING
    test_cfg[("aeff", "weight")]["apply_mode"] = TEST_BINNING
    # With the wrong order, this will fail.
    # FIXME: If someone changes the behavior of set_variance in the future to be
    # more robust, they are welcome to change this unit test.
    with pytest.raises(AssertionError):
        assert_correct_scaling(test_cfg, fixed_errors=True)

    logging.info("<< PASS : kde_stash >>")
Beispiel #54
0
def test_BinnedTensorTransform():
    """Unit tests for BinnedTensorTransform class"""
    binning = MultiDimBinning([
        dict(name='energy',
             is_log=True,
             domain=(1, 80) * ureg.GeV,
             num_bins=10),
        dict(name='coszen', is_lin=True, domain=(-1, 0), num_bins=5)
    ])

    nue_map = Map(name='nue',
                  binning=binning,
                  hist=np.random.random(binning.shape))
    nue_map.set_poisson_errors()
    numu_map = Map(name='numu',
                   binning=binning,
                   hist=np.random.random(binning.shape))
    numu_map.set_poisson_errors()
    inputs = MapSet(
        name='inputs',
        maps=[nue_map, numu_map],
    )

    xform0 = BinnedTensorTransform(input_names='nue',
                                   output_name='nue',
                                   input_binning=binning,
                                   output_binning=binning,
                                   xform_array=2 * np.ones(binning.shape))

    xform1 = BinnedTensorTransform(input_names=['numu'],
                                   output_name='numu',
                                   input_binning=binning,
                                   output_binning=binning,
                                   xform_array=3 * np.ones(binning.shape))

    xform2 = BinnedTensorTransform(
        input_names=['nue', 'numu'],
        output_name='nue_numu',
        input_binning=binning,
        output_binning=binning,
        xform_array=np.stack(
            [2 * np.ones(binning.shape), 3 * np.ones(binning.shape)], axis=0))
    assert np.all((xform2 + 2).xform_array - xform2.xform_array == 2)

    testdir = tempfile.mkdtemp()
    try:
        for i, t in enumerate([xform0, xform1, xform2]):
            t_file = os.path.join(testdir, str(i) + '.json')
            t.to_json(t_file)
            t_ = BinnedTensorTransform.from_json(t_file)
            assert t_ == t, 't=\n%s\nt_=\n%s' % (t, t_)
    finally:
        shutil.rmtree(testdir, ignore_errors=True)

    logging.info('<< PASS : test_BinnedTensorTransform >>')

    xforms = TransformSet(name='scaling',
                          transforms=[xform0, xform1, xform2],
                          hash=9)

    assert xforms.hash == 9
    xforms.hash = -20
    assert xforms.hash == -20

    _ = xforms.apply(inputs)

    # TODO: get this working above, then test here!
    #xforms2 = xforms * 2

    testdir = tempfile.mkdtemp()
    try:
        for i, t in enumerate([xforms]):
            t_filename = os.path.join(testdir, str(i) + '.json')
            t.to_json(t_filename)
            t_ = TransformSet.from_json(t_filename)
            assert t_ == t, 't=\n%s\nt_=\n%s' % (t.transforms, t_.transforms)
    finally:
        shutil.rmtree(testdir, ignore_errors=True)

    logging.info('<< PASS : test_TransformSet >>')
Beispiel #55
0
                        default="event_rate.json",
                        help='''file to store the output''')
    parser.add_argument('-v',
                        '--verbose',
                        action='count',
                        default=None,
                        help='''set verbosity level''')
    args = parser.parse_args()

    #Set verbosity level
    set_verbosity(args.verbose)

    #Check binning
    ebins, czbins = check_binning(args.osc_flux_maps)

    logging.info("Defining aeff_service...")

    if args.mc_mode:
        logging.info("  Using effective area from EVENT DATA...")
        aeff_service = AeffServiceMC(ebins,
                                     czbins,
                                     aeff_weight_file=args.weighted_aeff_file)
    else:
        logging.info("  Using effective area from PARAMETRIZATION...")
        aeff_settings = from_json(find_resource(args.settings_file))
        aeff_service = AeffServicePar(ebins, czbins, **aeff_settings)

    event_rate_maps = get_event_rates(args.osc_flux_maps, aeff_service,
                                      args.livetime, args.nu_nubar_ratio,
                                      args.aeff_scale)
Beispiel #56
0
def test_find_index():
    """Unit tests for `find_index` function.

    Correctness is defined as producing the same histogram as numpy.histogramdd
    by using the output of `find_index` (ignoring underflow and overflow values).
    Additionally, -1 should be returned if a value is below the range
    (underflow) or is nan, and num_bins should be returned for a value above
    the range (overflow).
    """
    # Negative, positive, integer, non-integer, binary-unrepresentable (0.1) edges
    basic_bin_edges = [-1, -0.5, -0.1, 0, 0.1, 0.5, 1, 2, 3, 4]

    failures = 0
    for basic_bin_edges in [
            # Negative, positive, integer, non-integer, binary-unrepresentable (0.1) edges
        [-1, -0.5, -0.1, 0, 0.1, 0.5, 1, 2, 3, 4],

            # A single infinite bin: [-np.inf, np.inf]
        [],

            # Half-infinite bins (lower or upper edge) & [-inf, .1, +inf]
        [0.1],

            # Single bin with finite edges & +/-inf-edge(s)-added variants
        [-0.1, 0.1],
    ]:
        # Bin edges from above, w/ and w/o +/-inf as left and/or right edges
        for le, re in [(None, None), (-np.inf, None), (None, np.inf),
                       (-np.inf, np.inf)]:
            bin_edges = deepcopy(basic_bin_edges)
            if le is not None:
                bin_edges = [le] + bin_edges
            if re is not None:
                bin_edges = bin_edges + [re]
            if len(bin_edges) < 2:
                continue
            logging.debug('bin_edges being tested: %s', bin_edges)
            bin_edges = np.array(bin_edges, dtype=FTYPE)

            num_bins = len(bin_edges) - 1
            underflow_idx = -1
            overflow_idx = num_bins

            #
            # Construct test values to try out
            #

            non_finite_vals = [-np.inf, +np.inf, np.nan]

            # Values within bins (i.e., not on edges)
            inbin_vals = []
            for idx in range(len(bin_edges) - 1):
                lower_be = bin_edges[idx]
                upper_be = bin_edges[idx + 1]
                if np.isfinite(lower_be):
                    if np.isfinite(upper_be):
                        inbin_val = (lower_be + upper_be) / 2
                    else:
                        inbin_val = lower_be + 10.5
                else:
                    if np.isfinite(upper_be):
                        inbin_val = upper_be - 10.5
                    else:
                        inbin_val = 10.5
                inbin_vals.append(inbin_val)

            # Values above/below bin edges by one unit of floating point
            # accuracy
            eps = np.finfo(FTYPE).eps  # pylint: disable=no-member
            below_edges_vals = [FTYPE((1 - eps) * be) for be in bin_edges]
            above_edges_vals = [FTYPE((1 + eps) * be) for be in bin_edges]

            test_vals = np.concatenate([
                non_finite_vals,
                bin_edges,
                inbin_vals,
                below_edges_vals,
                above_edges_vals,
            ])
            logging.trace('test_vals = %s', test_vals)

            #
            # Run tests
            #
            for val in test_vals:
                val = FTYPE(val)

                np_histvals, _ = np.histogramdd([val],
                                                np.atleast_2d(bin_edges))
                nonzero_indices = np.nonzero(np_histvals)[
                    0]  # select first & only dim
                if np.isnan(val):
                    assert len(nonzero_indices) == 0, str(len(nonzero_indices))
                    expected_idx = underflow_idx
                elif val < bin_edges[0]:
                    assert len(nonzero_indices) == 0, str(len(nonzero_indices))
                    expected_idx = underflow_idx
                elif val > bin_edges[-1]:
                    assert len(nonzero_indices) == 0, str(len(nonzero_indices))
                    expected_idx = overflow_idx
                else:
                    assert len(nonzero_indices) == 1, str(len(nonzero_indices))
                    expected_idx = nonzero_indices[0]

                found_idx = find_index(val, bin_edges)

                if found_idx != expected_idx:
                    failures += 1
                    msg = 'val={}, edges={}: Expected idx={}, found idx={}'.format(
                        val, bin_edges, expected_idx, found_idx)
                    logging.error(msg)

    assert failures == 0, f"{failures} failures, inspect ERROR messages above for info"

    logging.info('<< PASS : test_find_index >>')
Beispiel #57
0
def plot_prior(obj,
               param=None,
               x_xform=None,
               ax1=None,
               ax2=None,
               **plt_kwargs):
    """Plot prior for param from template settings, params, or prior filename
    or dict.

    Arguments
    ---------
    obj : str or dict
        if str, interpret as path from which to load a dict
        if (nested) dict, (innermost) must be dict of prior properties :
            either supply `param` to choose which parameter's prior in `obj`
            to plot, or prior dict, in which case `param` need not be specified
    param
        Param name to plot; necessary if obj is either pipeline settings or
        params dict
    x_xform
        Transform to apply to x-values. E.g., to plot against sin^2 theta, use
        x_xform = lambda x: np.sin(x)**2
    ax1, ax2
        Axes onto which to plot LLH and chi-squared, respectively. If none are
        provided, new figures & axes will be created.
    plt_kwargs
        Keyword arguments to pass on to the plot function

    Returns
    -------
    ax1, ax2
        The axes onto which plots were drawn (ax1 = LLH, ax2 = chi^2)
    """
    import matplotlib as mpl
    mpl.use('pdf')
    import matplotlib.pyplot as plt
    if isinstance(obj, basestring):
        obj = from_file(obj)
    if param is not None and param in obj:
        obj = obj[param]
    if 'prior' in obj:
        obj = obj['prior']

    prior = Prior(**obj)
    logging.info('Plotting Prior: %s', prior)
    x0 = prior.valid_range[0]
    x1 = prior.valid_range[1]
    if prior.kind == 'gaussian':
        x0 = max(x0, prior.max_at - 5 * prior.stddev)
        x1 = min(x1, prior.max_at + 5 * prior.stddev)
    if np.isinf(x0):
        x0 = -1
    if np.isinf(x1):
        x1 = +1
    # if prior.units is None, will result in dimensionless quantity
    x = ureg.Quantity(np.linspace(x0, x1, 5000), prior.units)

    llh = prior.llh(x)
    chi2 = prior.chi2(x)

    if x_xform is not None:
        x = x_xform(x)

    if ax1 is None:
        f = plt.figure()
        ax1 = f.add_subplot(111)
    if ax2 is None:
        f = plt.figure()
        ax2 = f.add_subplot(111)

    ax1.plot(x, llh, **plt_kwargs)
    ax2.plot(x, chi2, **plt_kwargs)

    ax1.set_title(str(prior), fontsize=8, y=1.02)
    ax2.set_title(str(prior), fontsize=8, y=1.02)
    ax1.set_xlabel(param)
    ax2.set_xlabel(param)
    ax1.set_ylabel('LLH')
    ax2.set_ylabel(r'$\Delta\chi^2$')

    return ax1, ax2
Beispiel #58
0
    def setup_function(self):

        self.data.representation = self.calc_mode

        #
        # Init arrays
        #

        # Prepare some array shapes
        gradient_params_shape = (len(self.gradient_param_names), )

        if self.data.is_map:
            # speed up calculation by adding links
            # as nominal flux doesn't depend on the (outgoing) flavour
            self.data.link_containers('nu', [
                'nue_cc', 'numu_cc', 'nutau_cc', 'nue_nc', 'numu_nc',
                'nutau_nc'
            ])

            self.data.link_containers('nubar', [
                'nuebar_cc', 'numubar_cc', 'nutaubar_cc', 'nuebar_nc',
                'numubar_nc', 'nutaubar_nc'
            ])

        # Loop over containers
        for container in self.data:

            #TODO Toggles for including both nu and nubar flux (required for CPT violating oscillations)

            # Flux container shape : [ N events, N flavors in primary flux ]
            num_events = container.size
            num_flux_flavs = 3 if self.include_nutau_flux else 2
            flux_container_shape = (num_events, num_flux_flavs)

            # Gradients container shape
            gradients_shape = tuple(
                list(flux_container_shape) + list(gradient_params_shape))

            # Create arrays that will be populated in the stage
            # Note that the flux arrays will be chosen as nu or nubar depending
            # on the container (e.g. not simultaneously storing nu and nubar)
            # Would rather use multi-dim arrays here but limited by fact that
            # numba only supports 1/2D versions of numpy functions
            container["nu_flux_nominal"] = np.full(flux_container_shape,
                                                   np.NaN,
                                                   dtype=FTYPE)
            container["nu_flux"] = np.full(flux_container_shape,
                                           np.NaN,
                                           dtype=FTYPE)
            container["gradients"] = np.full(gradients_shape,
                                             np.NaN,
                                             dtype=FTYPE)

        # Also create an array container to hold the gradient parameter values
        # Only want this once, e.g. not once per container
        self.gradient_params = np.empty(gradient_params_shape, dtype=FTYPE)

        #
        # Load MCEq splines
        #

        # Have splined both nominal fluxes and gradients in flux w.r.t.
        # Barr parameters, using MCEQ.

        # Have splines for each Barr parameter, plus +/- versions of each
        # Barr parameter corresponding to mesons/antimesons.

        # For a given Barr parameter, an underlying dictionary have the following
        # keywords:
        #     "numu", "numubar", "nue", "nuebar"
        #     derivatives: "dnumu", "dnumubar", "dnue", dnuebar"
        # Units are changed to m^-2 in creates_splines.., rather than cm^2 which
        # is the unit of calculation in MCEq

        # Note that doing this all on CPUs, since the splines reside on the CPUs
        # The actual `compute_function` computation can be done on GPUs though

        # Load the MCEq splines
        spline_file = find_resource(self.table_file)
        logging.info("Loading MCEq spline tables from : %s", spline_file)
        # Encoding is to support pickle files created with python v2
        self.spline_tables_dict = pickle.load(BZ2File(spline_file),
                                              encoding="latin1")

        # Ensure that the user is not loading an incompatible spline
        for bp in self.barr_param_names:
            bp_p = bp + '+'  # meson
            bp_m = bp + '-'  # antimeson
            assert bp_p in self.spline_tables_dict.keys(), (
                "Gradient parameter '%s' missing from table" % bp_p)
            assert bp_m in self.spline_tables_dict.keys(), (
                "Gradient parameter '%s' missing from table" % bp_m)

        # Loop over containers
        for container in self.data:

            # Grab containers here once to save time
            # TODO make spline generation script store splines directly in
            # terms of energy, not ln(energy)
            true_log_energy = np.log(container["true_energy"])
            true_abs_coszen = np.abs(container["true_coszen"])
            nu_flux_nominal = container["nu_flux_nominal"]
            gradients = container["gradients"]
            nubar = container["nubar"]

            #
            # Nominal flux
            #

            if not self.use_honda_nominal_flux:

                # Evaluate splines to get nominal flux
                # Need to correctly map nu/nubar and flavor to the output arrays

                # Note that nominal flux is stored multiple times (once per Barr parameter)
                # Choose an arbitrary one to get the nominal fluxes
                arb_gradient_param_key = self.gradient_param_names[0]

                # nue(bar)
                nu_flux_nominal[:, 0] = self.spline_tables_dict[
                    arb_gradient_param_key]["nue" if nubar > 0 else "nuebar"](
                        true_abs_coszen,
                        true_log_energy,
                        grid=False,
                    )

                # numu(bar)
                nu_flux_nominal[:, 1] = self.spline_tables_dict[
                    arb_gradient_param_key][
                        "numu" if nubar > 0 else "numubar"](
                            true_abs_coszen,
                            true_log_energy,
                            grid=False,
                        )

                # nutau(bar)
                # Currently setting to 0 #TODO include nutau flux (e.g. prompt) in splines
                if self.include_nutau_flux:
                    nu_flux_nominal[:, 2] = self.spline_tables_dict[
                        arb_gradient_param_key][
                            "nutau" if nubar > 0 else "nutaubar"](
                                true_abs_coszen,
                                true_log_energy,
                                grid=False,
                            )

            # Tell the smart arrays we've changed the nominal flux values on the host
            container.mark_changed("nu_flux_nominal")

            #
            # Flux gradients
            #

            # Evaluate splines to get the flux graidents w.r.t the Barr parameter values
            # Need to correctly map nu/nubar and flavor to the output arrays

            # Loop over parameters
            for (
                    gradient_param_name,
                    gradient_param_idx,
            ) in self.gradient_param_indices.items():

                # nue(bar)
                gradients[:, 0, gradient_param_idx] = self.spline_tables_dict[
                    gradient_param_name]["dnue" if nubar > 0 else "dnuebar"](
                        true_abs_coszen,
                        true_log_energy,
                        grid=False,
                    )

                # numu(bar)
                gradients[:, 1, gradient_param_idx] = self.spline_tables_dict[
                    gradient_param_name]["dnumu" if nubar > 0 else "dnumubar"](
                        true_abs_coszen,
                        true_log_energy,
                        grid=False,
                    )

                # nutau(bar)
                if self.include_nutau_flux:
                    gradients[:, 2,
                              gradient_param_idx] = self.spline_tables_dict[
                                  gradient_param_name][
                                      "dnutau" if nubar > 0 else "dnutaubar"](
                                          true_abs_coszen,
                                          true_log_energy,
                                          grid=False,
                                      )

            # Tell the smart arrays we've changed the flux gradient values on the host
            container.mark_changed("gradients")

        # don't forget to un-link everything again
        self.data.unlink_containers()
Beispiel #59
0
def scan_allsyst(template_settings, steps, hypo_param_selections, outdir,
                 minimizer_settings, metric, debug_mode):
    """Scan (separately) all systematics (i.e., non-fixed params).

    Parameters
    ----------
    template_settings
    steps
    hypo_param_selections
    outdir
    minimizer_settings
    metric
    debug_mode

    Returns
    -------
    restults : dict
        Keys are param names, values are the scan results

    """
    outdir = expanduser(expandvars(outdir))
    mkdir(outdir, warn=False)

    hypo_maker = DistributionMaker(template_settings)

    hypo_maker.select_params(hypo_param_selections)
    data_dist = hypo_maker.get_outputs(return_sum=True)

    minimizer_settings = from_file(minimizer_settings)

    analysis = Analysis()

    results = OrderedDict()  # pylint: disable=redefined-outer-name
    for param in hypo_maker.params:
        if param.is_fixed:
            continue

        logging.info('Scanning %s', param.name)
        nominal_value = param.value

        outfile = join(
            outdir,
            '{:s}_{:d}_steps_{:s}_scan.json'.format(param.name, steps, metric))
        if isfile(outfile):
            raise IOError(
                '`outfile` "{}" exists, not overwriting.'.format(outfile))

        results[param.name] = analysis.scan(
            data_dist=data_dist,
            hypo_maker=hypo_maker,
            hypo_param_selections=hypo_param_selections,
            metric=metric,
            param_names=param.name,
            steps=steps,
            only_points=None,
            outer=True,
            profile=False,
            minimizer_settings=minimizer_settings,
            outfile=outfile,
            debug_mode=debug_mode)

        to_file(results[param.name], outfile)
        param.value = nominal_value

        logging.info('Done scanning param "%s"', param.name)

    logging.info('Done.')

    return results
Beispiel #60
0
def test_Prior():
    """Unit tests for Prior class"""
    uniform = Prior(kind='uniform', llh_offset=1.5)
    gaussian = Prior(kind='gaussian', mean=10, stddev=1)
    x = np.linspace(-10, 10, 100)
    y = x**2
    linterp = Prior(kind='linterp', param_vals=x * ureg.meter, llh_vals=y)
    param_vals = np.linspace(-10, 10, 100)
    llh_vals = x**2
    knots, coeffs, deg = splrep(param_vals, llh_vals)
    spline = Prior(kind='spline',
                   knots=knots * ureg.foot,
                   coeffs=coeffs,
                   deg=deg)
    param_upsamp = np.linspace(-10, 10, 1000) * ureg.foot
    llh_upsamp = splev(param_upsamp, tck=(knots, coeffs, deg), ext=2)
    assert all(spline.llh(param_upsamp) == llh_upsamp)

    # Asking for param value outside of range should fail
    try:
        linterp.llh(-1000 * ureg.mile)
    except ValueError:
        pass
    else:
        assert False

    try:
        linterp.chi2(-1000 * ureg.km)
    except ValueError:
        pass
    else:
        assert False

    try:
        spline.llh(-1000 * ureg.meter)
    except ValueError:
        pass
    else:
        assert False

    try:
        spline.chi2(+1000 * ureg.meter)
    except ValueError:
        pass
    else:
        assert False

    # Asking for param value when units were used should fail
    try:
        spline.llh(10)
    except TypeError:
        pass
    else:
        assert False

    # ... or vice versa
    try:
        gaussian.llh(10 * ureg.meter)
    except (TypeError, pint.DimensionalityError):
        pass
    else:
        assert False

    logging.info('<< PASS : test_Prior >>')