Ejemplo n.º 1
0
def assemble_interpolated_fits(fit_directory, output_file):
    """After all of the fits on the cluster are done, assemble the results to one JSON.
    
    The JSON produced by this function is what `load_interpolated_hypersurfaces`
    expects.
    """
    assert os.path.isdir(fit_directory), "fit directory does not exist"
    metadata = from_json(os.path.join(fit_directory, "metadata.json"))
    
    combined_data = collections.OrderedDict()
    combined_data["interpolation_param_spec"] = metadata["interpolation_param_spec"]
    hs_fits = []
    grid_shape = tuple(metadata["grid_shape"])
    for job_idx, grid_idx in enumerate(np.ndindex(grid_shape)):
        gridpoint_json = os.path.join(fit_directory, f"gridpoint_{job_idx:06d}.json.bz2")
        logging.info(f"Reading {gridpoint_json}")
        gridpoint_data = from_json(gridpoint_json)
        assert job_idx == gridpoint_data["job_idx"]
        assert np.all(grid_idx == gridpoint_data["grid_idx"])
        # TODO: Offer to run incomplete fits locally
        assert gridpoint_data["fit_successful"], f"job no. {job_idx} not finished"
        hs_fits.append(collections.OrderedDict(
            param_values=gridpoint_data["param_values"],
            hs_fit=gridpoint_data["hs_fit"]
        ))
    combined_data["hs_fits"] = hs_fits
    to_json(combined_data, output_file)
Ejemplo n.º 2
0
def assemble_interpolated_fits(fit_directory,
                               output_file,
                               drop_fit_maps=False):
    """After all of the fits on the cluster are done, assemble the results to one JSON.

    The JSON produced by this function is what `load_interpolated_hypersurfaces`
    expects.
    """
    assert os.path.isdir(fit_directory), "fit directory does not exist"
    metadata = from_json(os.path.join(fit_directory, "metadata.json"))

    combined_data = collections.OrderedDict()
    combined_data["interpolation_param_spec"] = metadata[
        "interpolation_param_spec"]

    # Loop over grid points
    hs_fits = []
    grid_shape = tuple(metadata["grid_shape"])
    for job_idx, grid_idx in enumerate(np.ndindex(grid_shape)):

        # Load grid point data
        gridpoint_json = os.path.join(fit_directory,
                                      f"gridpoint_{job_idx:06d}.json.bz2")
        logging.info(f"Reading {gridpoint_json}")
        gridpoint_data = from_json(gridpoint_json)

        # Check the loaded data
        assert job_idx == gridpoint_data["job_idx"]
        assert np.all(grid_idx == gridpoint_data["grid_idx"])
        # TODO: Offer to run incomplete fits locally
        assert gridpoint_data[
            "fit_successful"], f"job no. {job_idx} not finished"

        # Drop fit maps if requested (can significantly reduce file size)
        if drop_fit_maps:
            for key, hs_state in gridpoint_data["hs_fit"].items():
                hs_state["fit_maps_raw"] = None
                hs_state["fit_maps_norm"] = None

        # Add grid point data to output file
        hs_fits.append(
            collections.OrderedDict(
                param_values=gridpoint_data["param_values"],
                hs_fit=gridpoint_data["hs_fit"]))

    # Write the output file
    combined_data["hs_fits"] = hs_fits
    to_file(combined_data, output_file)
Ejemplo n.º 3
0
    def __init__(self,ebins,czbins,aeff_egy_par,aeff_coszen_par,**params):
        '''
        Parameters:
        * aeff_egy_par - effective area vs. Energy 1D parameterizations for each flavor,
        in a text file (.dat)
        * aeff_coszen_par - json file containing 1D coszen parameterization for each flavor 
        '''
        logging.info('Initializing AeffServicePar...')

        self.ebins = ebins
        self.czbins = czbins


        ## Load the info from .dat files into a dict...
        ## Parametric approach treats all NC events the same
        aeff_coszen_par_str = from_json(find_resource(aeff_coszen_par))
        aeff2d_nc = self.get_aeff_flavor('NC',aeff_egy_par,aeff_coszen_par_str)
        aeff2d_nc_bar = self.get_aeff_flavor('NC_bar',aeff_egy_par,aeff_coszen_par_str)

        self.aeff_dict = {}
        logging.info("Creating effective area parametric dict...")
        for flavor in ['nue','nue_bar','numu','numu_bar','nutau','nutau_bar']:
            flavor_dict = {}
            logging.debug("Working on %s effective areas"%flavor)

            aeff2d = self.get_aeff_flavor(flavor,aeff_egy_par,aeff_coszen_par_str)

            flavor_dict['cc'] = aeff2d
            flavor_dict['nc'] = aeff2d_nc_bar if 'bar' in flavor else aeff2d_nc

            self.aeff_dict[flavor] = flavor_dict

        return
Ejemplo n.º 4
0
    def __init__(self,ebins,czbins,settings_file=None):
        '''
        settings - expects the dictionary from a .json file with
                        entries of 'a_eff_files', & 'a_eff_coszen_dep'
        '''
        self.ebins = ebins
        self.czbins = czbins

        ##Load the settings from the file
        settings = from_json(find_resource(settings_file))
        
        ## Load the info from .dat files into a dict...  
        ## Parametric approach treats all NC events the same
        aeff2d_nc = self.get_aeff_flavor('NC',settings)
        aeff2d_nc_bar = self.get_aeff_flavor('NC_bar',settings)
        
        self.aeff_dict = {}
        logging.info("Creating effective area parametric dict...")
        for flavor in ['nue','nue_bar','numu','numu_bar','nutau','nutau_bar']:
            flavor_dict = {}
            logging.debug("Working on %s effective areas"%flavor)

            aeff2d = self.get_aeff_flavor(flavor,settings)

            flavor_dict['cc'] = aeff2d
            flavor_dict['nc'] = aeff2d_nc_bar if 'bar' in flavor else aeff2d_nc
            
            self.aeff_dict[flavor] = flavor_dict
                    
        return
Ejemplo n.º 5
0
    def from_json(cls, resource):
        """Instantiate a new TransformSet object from a JSON file.

        Parameters
        ----------
        resource : str
            A PISA resource specification (see pisa.utils.resources)

        See Also
        --------
        to_json
        pisa.utils.jsons.to_json

        """
        state = jsons.from_json(resource)
        transforms = []
        for module, classname, transform_state in state['transforms']:
            clsmembers = inspect.getmembers(sys.modules[__name__],
                                            inspect.isclass)
            # First try to get a class within this module/namespace
            classes = [c[1] for c in clsmembers if c[0] == classname]
            if len(classes) > 0:
                class_ = classes[0]
            # Otherwise try to import the module recorded in the JSON file
            else:
                module = importlib.import_module(module)
                # and then get the class
                class_ = getattr(module, classname)
            transforms.append(class_(**transform_state))
        state['transforms'] = transforms
        # State is a dict, so instantiate with double-asterisk syntax
        return cls(**state)
Ejemplo n.º 6
0
 def get_pid_kernels(self, pid_kernelfile=None, **kwargs):
     logging.info('Opening file: %s'%(pid_kernelfile))
     try:
         self.pid_kernels = from_json(find_resource(pid_kernelfile))
     except IOError, e:
         logging.error("Unable to open kernel file %s"%pid_kernelfile)
         logging.error(e)
         sys.exit(1)
Ejemplo n.º 7
0
 def get_pid_kernels(self, pid_kernelfile=None, **kwargs):
     logging.info('Opening file: %s' % (pid_kernelfile))
     try:
         self.pid_kernels = from_json(find_resource(pid_kernelfile))
     except IOError, e:
         logging.error("Unable to open kernel file %s" % pid_kernelfile)
         logging.error(e)
         sys.exit(1)
Ejemplo n.º 8
0
    def _get_reco_kernels(self, kernelfile=None, **kwargs):

        for reco_scale in ['e_reco_scale', 'cz_reco_scale']:
            if reco_scale in kwargs:
                if not kwargs[reco_scale]==1:
                    raise ValueError('%s = %.2f not valid for RecoServiceKernelFile!'
                                     %(reco_scale, kwargs[reco_scale]))

        if not kernelfile in [self.kernelfile, None]:
            logging.info('Reconstruction from non-default kernel file %s!'%kernelfile)
            return from_json(find_resource(kernelfile))

        if not hasattr(self, 'kernels'):
            logging.info('Using file %s for default reconstruction'%(kernelfile))
            self.kernels = from_json(find_resource(kernelfile))

        return self.kernels
Ejemplo n.º 9
0
def get_incomplete_job_idx(fit_directory):
    """Get job indices of fits that are not flagged as successful."""

    assert os.path.isdir(fit_directory), "fit directory does not exist"
    metadata = from_json(os.path.join(fit_directory, "metadata.json"))
    grid_shape = tuple(metadata["grid_shape"])
    failed_idx = []
    for job_idx, grid_idx in enumerate(np.ndindex(grid_shape)):
        try:
            gridpoint_json = os.path.join(fit_directory,
                                          f"gridpoint_{job_idx:06d}.json.bz2")
            logging.info(f"Reading {gridpoint_json}")
            gridpoint_data = from_json(gridpoint_json)
        except:
            break
        if not gridpoint_data["fit_successful"]:
            failed_idx.append(job_idx)
        job_idx += 1
    return failed_idx
Ejemplo n.º 10
0
def from_file(fname, fmt=None, **kwargs):
    """Dispatch correct file reader based on `fmt` (if specified) or guess
    based on file name's extension.

    Parameters
    ----------
    fname : string
        File path / name from which to load data.

    fmt : None or string
        If string, for interpretation of the file according to this format. If
        None, file format is deduced by an extension found in `fname`.

    **kwargs
        All other arguments are passed to the function dispatched to read the
        file.

    Returns
    -------
    Object instantiated from the file (string, dictionary, ...). Each format
    is interpreted differently.

    Raises
    ------
    ValueError
        If extension is not recognized

    """
    if fmt is None:
        rootname, ext = os.path.splitext(fname)
        ext = ext.replace('.', '').lower()
    else:
        rootname = fname
        ext = fmt.lower()

    if ext in ZIP_EXTS or ext in XOR_EXTS:
        rootname, inner_ext = os.path.splitext(rootname)
        inner_ext = inner_ext.replace('.', '').lower()
        ext = inner_ext

    fname = resources.find_resource(fname)
    if ext in jsons.JSON_EXTS:
        return jsons.from_json(fname, **kwargs)
    if ext in hdf.HDF5_EXTS:
        return hdf.from_hdf(fname, **kwargs)
    if ext in PKL_EXTS:
        return from_pickle(fname, **kwargs)
    if ext in CFG_EXTS:
        return from_cfg(fname, **kwargs)
    if ext in TXT_EXTS:
        return from_txt(fname, **kwargs)
    errmsg = 'File "%s": unrecognized extension "%s"' % (fname, ext)
    log.logging.error(errmsg)
    raise ValueError(errmsg)
Ejemplo n.º 11
0
    def get_pid_kernels(self, pid_paramfile=None,
                        PID_offset=0., PID_scale=1., **kwargs):

        # load parametrization file
        logging.info('Opening PID parametrization file %s'%pid_paramfile)
        try:
            param_str = from_json(find_resource(pid_paramfile))
        except IOError, e:
            logging.error("Unable to open PID parametrization file %s"
                          %pid_paramfile)
            logging.error(e)
            sys.exit(1)
Ejemplo n.º 12
0
    def get_pid_kernels(self,
                        pid_paramfile=None,
                        PID_offset=0.,
                        PID_scale=1.,
                        **kwargs):

        # load parametrization file
        logging.info('Opening PID parametrization file %s' % pid_paramfile)
        try:
            param_str = from_json(find_resource(pid_paramfile))
        except IOError, e:
            logging.error("Unable to open PID parametrization file %s" %
                          pid_paramfile)
            logging.error(e)
            sys.exit(1)
Ejemplo n.º 13
0
    def setup_function(self):
        scale_file = find_resource(self.scale_file)
        logging.info("Loading scaling factors from : %s", scale_file)

        scaling_dict = from_json(scale_file)
        scale_binning = MultiDimBinning(
            **scaling_dict[self.variable]["binning"])

        scale_factors = np.array(scaling_dict[self.variable]["scales"],
                                 dtype=FTYPE)
        logging.info(f"Binning for ad-hoc systematic: \n {str(scale_binning)}")
        logging.info(
            f"scaling factors of ad-hoc systematic:\n {str(scale_factors)}")
        self.data.representation = scale_binning
        for container in self.data:
            container["adhoc_scale_factors"] = scale_factors
Ejemplo n.º 14
0
Archivo: utils.py Proyecto: lkijmj/pisa
def from_file(fname, fmt=None):
    """Dispatch correct file reader based on fmt (if specified) or guess
    based on file name's extension"""
    if fmt is None:
        base, ext = os.path.splitext(fname)
        ext = ext.replace('.', '').lower()
    else:
        ext = fmt.lower()
    if ext in JSON_EXTS:
        return jsons.from_json(fname)
    elif ext in HDF5_EXTS:
        return hdf.from_hdf(fname)
    elif ext in PKL_EXTS:
        return cPickle.load(file(fname,'rb'))
    else:
        errmsg = 'Unrecognized file type/extension: ' + ext
        logging.error(errmsg)
        raise TypeError(errmsg)
Ejemplo n.º 15
0
def from_file(fname, fmt=None):
    """Dispatch correct file reader based on fmt (if specified) or guess
    based on file name's extension"""
    if fmt is None:
        base, ext = os.path.splitext(fname)
        ext = ext.replace('.', '').lower()
    else:
        ext = fmt.lower()
    if ext in JSON_EXTS:
        return jsons.from_json(fname)
    elif ext in HDF5_EXTS:
        return hdf.from_hdf(fname)
    elif ext in PKL_EXTS:
        return cPickle.load(file(fname, 'rb'))
    else:
        errmsg = 'Unrecognized file type/extension: ' + ext
        logging.error(errmsg)
        raise TypeError(errmsg)
Ejemplo n.º 16
0
    def from_json(cls, resource):
        """Instantiate a new Map object from a JSON file.

        The format of the JSON is generated by the `Map.to_json` method, which
        converts a Map object to basic types and then numpy arrays are
        converted in a call to `pisa.utils.jsons.to_json`.

        Parameters
        ----------
        resource : str
            A PISA resource specification (see pisa.utils.resources)

        See Also
        --------
        to_json
        pisa.utils.jsons.to_json

        """
        state = jsons.from_json(resource)
        # State is a dict for Map, so instantiate with double-asterisk syntax
        return cls(**state)
Ejemplo n.º 17
0
    def __init__(self,
                 detector,
                 geom,
                 proc_ver,
                 pid_spec_ver=1,
                 pid_specs=None):
        geom = str(geom)
        proc_ver = str(proc_ver)
        pid_spec_ver = str(pid_spec_ver)

        if pid_specs is None:
            pid_specs = 'pid/pid_specifications.json'
        if isinstance(pid_specs, str):
            pid_specs = from_json(resources.find_resource(pid_specs))
        elif isinstance(pid_specs, collections.Mapping):
            pass
        else:
            raise TypeError('Unhandled `pid_specs` type: "%s"' %
                            type(data_proc_params))
        self.detector = detector
        self.proc_ver = proc_ver
        self.pid_spec_ver = str(pid_spec_ver)
        d = pid_specs
        all_k = []
        for wanted_key in [detector, geom, proc_ver, pid_spec_ver]:
            wanted_key = wanted_key.replace("'", "").lower()
            for orig_dict_key, subdict in d.items():
                dict_key = orig_dict_key.replace("'", "").lower()
                if (dict_key == wanted_key):
                    d = subdict
                    all_k.append(orig_dict_key)
        if len(all_k) != 4:
            raise ValueError('Could not find %s' %
                             str([detector, geom, proc_ver, pid_spec_ver]))
        self.pid_spec = pid_specs[all_k[0]][all_k[1]][all_k[2]][all_k[3]]

        # Enforce rules on PID spec:
        self.validatePIDSpec(self.pid_spec)
Ejemplo n.º 18
0
    def __init__(self, ebins, czbins, reco_param_file=None, **kwargs):
        """
        Parameters needed to instantiate a reconstruction service with
        parametrizations:
        * ebins: Energy bin edges
        * czbins: cos(zenith) bin edges
        * reco_param_file: JSON containing the parametrizations
        """

        # Load parametrization
        logging.info('Opening reconstruction parametrization file %s'
                     %reco_param_file)

        # Needed for self.read_param_string()
        self.ebins = ebins
        self.czbins = czbins

        # Get parametrization
        param_str = from_json(find_resource(reco_param_file))
        self.parametrization = self.read_param_string(param_str)

        # No **kwargs, so stored kernels will always have reco scales 1.0
        RecoServiceBase.__init__(self, ebins, czbins,**kwargs)
Ejemplo n.º 19
0
    def __init__(self, ebins, czbins, reco_param_file=None, **kwargs):
        """
        Parameters needed to instantiate a reconstruction service with
        parametrizations:
        * ebins: Energy bin edges
        * czbins: cos(zenith) bin edges
        * reco_param_file: JSON containing the parametrizations
        """

        # Load parametrization
        logging.info('Opening reconstruction parametrization file %s' %
                     reco_param_file)

        # Needed for self.read_param_string()
        self.ebins = ebins
        self.czbins = czbins

        # Get parametrization
        param_str = from_json(find_resource(reco_param_file))
        self.parametrization = self.read_param_string(param_str)

        # No **kwargs, so stored kernels will always have reco scales 1.0
        RecoServiceBase.__init__(self, ebins, czbins, **kwargs)
Ejemplo n.º 20
0
    def __init__(self, ebins, czbins, aeff_egy_par, aeff_coszen_par, **params):
        '''
        Parameters:
        * aeff_egy_par - effective area vs. Energy 1D parameterizations for each flavor,
        in a text file (.dat)
        * aeff_coszen_par - json file containing 1D coszen parameterization for each flavor 
        '''
        logging.info('Initializing AeffServicePar...')

        self.ebins = ebins
        self.czbins = czbins

        ## Load the info from .dat files into a dict...
        ## Parametric approach treats all NC events the same
        aeff_coszen_par_str = from_json(find_resource(aeff_coszen_par))
        aeff2d_nc = self.get_aeff_flavor('NC', aeff_egy_par,
                                         aeff_coszen_par_str)
        aeff2d_nc_bar = self.get_aeff_flavor('NC_bar', aeff_egy_par,
                                             aeff_coszen_par_str)

        self.aeff_dict = {}
        logging.info("Creating effective area parametric dict...")
        for flavor in [
                'nue', 'nue_bar', 'numu', 'numu_bar', 'nutau', 'nutau_bar'
        ]:
            flavor_dict = {}
            logging.debug("Working on %s effective areas" % flavor)

            aeff2d = self.get_aeff_flavor(flavor, aeff_egy_par,
                                          aeff_coszen_par_str)

            flavor_dict['cc'] = aeff2d
            flavor_dict['nc'] = aeff2d_nc_bar if 'bar' in flavor else aeff2d_nc

            self.aeff_dict[flavor] = flavor_dict

        return
Ejemplo n.º 21
0
                        help="Save all stages.")
    parser.add_argument('-o',
                        '--outfile',
                        dest='outfile',
                        metavar='FILE',
                        type=str,
                        action='store',
                        default="template.json",
                        help='file to store the output')
    args = parser.parse_args()

    set_verbosity(args.verbose)

    with Timer() as t:
        #Load all the settings
        model_settings = from_json(args.template_settings)

        #Select a hierarchy
        logging.info('Selected %s hierarchy' %
                     ('normal' if args.normal else 'inverted'))
        params = select_hierarchy(model_settings['params'],
                                  normal_hierarchy=args.normal)

        #Intialize template maker
        template_maker = TemplateMaker(get_values(params),
                                       **model_settings['binning'])
    tprofile.info("  ==> elapsed time to initialize templates: %s sec" %
                  t.secs)

    #Now get the actual template
    with Timer(verbose=False) as t:
Ejemplo n.º 22
0
    def __init__(self, detector, proc_ver, data_proc_params=None):
        super().__init__()
        if data_proc_params is None:
            data_proc_params = 'events/data_proc_params.json'
        if isinstance(data_proc_params, str):
            ps = jsons.from_json(resources.find_resource(data_proc_params))
        elif isinstance(data_proc_params, dict):
            ps = data_proc_params
        else:
            raise TypeError('Unhandled data_proc_params type passed in arg: ' +
                            type(data_proc_params))
        self.detector = detector
        self.proc_ver = str(proc_ver)
        self.det_key = [k for k in ps.keys()
                        if k.lower() == self.detector.lower()][0]
        for key in ps[self.det_key].keys():
            lk = key.lower()
            lpv = self.proc_ver.lower()
            if lk == lpv or ('v'+lk == lpv) or (lk == 'v'+lpv):
                self.procver_key = key
                # This works for PINGU
            elif ('msu_'+lk == lpv) or (lk == 'msu_'+lpv):
                self.procver_key = key
            elif ('nbi_'+lk == lpv) or (lk == 'nbi_'+lpv):
                self.procver_key = key
                # Generalising for DeepCore and different selections
        ps = ps[self.det_key][self.procver_key]
        self.update(ps)

        self.trans_nu_code = False
        if 'nu_code_to_pdg_map' in self:
            self.trans_nu_code = True
            try:
                self.nu_code_to_pdg_map = {
                    int(code): pdg
                    for code, pdg in self['nu_code_to_pdg_map'].items()
                }
            except:
                self.nu_code_to_pdg_map = self['nu_code_to_pdg_map']

        # NOTE: the keys are strings so the particular string formatting is
        # important for indexing into the dict!

        # Add generic cuts
        self['cuts'].update({
            # Cut for particles only (no anti-particles)
            str(NuFlav(12).bar_code).lower():
                {'fields': ['nu_code'], 'pass_if': 'nu_code > 0'},
            # Cut for anti-particles only (no particles)
            str(NuFlav(-12).bar_code).lower():
                {'fields': ['nu_code'], 'pass_if': 'nu_code < 0'},
            # Cut for charged-current interactions only
            str(IntType('cc')).lower():
                {'fields': ['interaction_type'],
                 'pass_if': 'interaction_type == 1'},
            # Cut for neutral-current interactions only
            str(IntType('nc')).lower():
                {'fields': ['interaction_type'],
                 'pass_if': 'interaction_type == 2'},
            # True-upgoing cut usinng the zenith field
            'true_upgoing_zen':
                {'fields': ['true_zenith'], 'pass_if': 'true_zenith > pi/2'},
            # True-upgoing cut usinng the cosine-zenith field
            'true_upgoing_coszen':
                {'fields': ['true_coszen'], 'pass_if': 'true_coszen < 0'},
        })

        # Enforce rules on cuts:
        self.validate_cut_spec(self['cuts'])
Ejemplo n.º 23
0
    assert len(llhfiles) <= len(logfiles), "Data and log directories don't match?"

# Output to save to hdf5 file:
output_data = {"minimizer_settings": {}, "template_settings": {}, "true_NMH": {}, "true_IMH": {}}

logging.warn("Processing {0:d} files".format(len(llhfiles)))

mod = len(llhfiles) // 20
start = time.time()
for i, filename in enumerate(llhfiles):

    if (mod > 0) and (i % mod == 0):
        logging.info("  >> {0:d} files done...".format(i))

    try:
        data = from_json(filename)
    except Exception as inst:
        # print(inst)
        print("Skipping file: ", filename)
        continue

    if not output_data["minimizer_settings"]:
        output_data["minimizer_settings"] = data["minimizer_settings"]

    if not output_data["template_settings"]:
        output_data["template_settings"] = data["template_settings"]

    for key in ["true_NMH", "true_IMH"]:
        appendTrials(output_data[key], data[key])

    if args.log_dir is not None:
three_chi2s_theta23 = {}
three_chi2s_theta23['data_NMH'] = {}
three_chi2s_theta23['data_IMH'] = {}

ten_significances = {}
ten_significances['data_NMH'] = []
ten_significances['data_IMH'] = []

ten_chi2s_theta23 = {}
ten_chi2s_theta23['data_NMH'] = {}
ten_chi2s_theta23['data_IMH'] = {}

for infile in sorted(os.listdir(three_true_h_fid_dir)):
    if os.path.isfile(three_true_h_fid_dir+infile):
        indict = from_json(three_true_h_fid_dir+infile)
        theta23_nh = indict['template_settings']['params']['theta23_nh']['value']
        theta23_ih = indict['template_settings']['params']['theta23_ih']['value']
        assert(theta23_nh == theta23_ih)
        theta23vals.append(theta23_nh)
        sin2theta23vals.append(math.pow(math.sin(theta23_nh),2))
        three_chi2s_theta23['data_NMH'][theta23_nh] = {'true_h_fiducial': [], 'false_h_best': []}
        three_chi2s_theta23['data_IMH'][theta23_nh] = {'true_h_fiducial': [], 'false_h_best': []}
        ten_chi2s_theta23['data_NMH'][theta23_nh] = {'true_h_fiducial': [], 'false_h_best': []}
        ten_chi2s_theta23['data_IMH'][theta23_nh] = {'true_h_fiducial': [], 'false_h_best': []}

theta23vals = sorted(theta23vals)
sin2theta23vals = sorted(sin2theta23vals)

for theta23 in theta23vals:
    
Ejemplo n.º 25
0
                    '--outfile',
                    type=str,
                    default='llh_data.json',
                    metavar='JSONFILE',
                    help="Output filename.")
parser.add_argument('-v',
                    '--verbose',
                    action='count',
                    default=None,
                    help='set verbosity level')
args = parser.parse_args()

set_verbosity(args.verbose)

#Read in the settings
template_settings = from_json(args.template_settings)
minimizer_settings = from_json(args.minimizer_settings)
pseudo_data_settings = from_json(
    args.pseudo_data_settings
) if args.pseudo_data_settings is not None else template_settings

#Workaround for old scipy versions
import scipy
if scipy.__version__ < '0.12.0':
    logging.warn('Detected scipy version %s < 0.12.0' % scipy.__version__)
    if 'maxiter' in minimizer_settings:
        logging.warn('Optimizer settings for \"maxiter\" will be ignored')
        minimizer_settings.pop('maxiter')

# make sure that both pseudo data and template are using the same
# channel. Raise Exception and quit otherwise
Ejemplo n.º 26
0
                        action='store_false',
                         help="select the inverted hierarchy")
    parser.add_argument('-v', '--verbose', action='count', default=None,
                        help='set verbosity level.')
    parser.add_argument('-s', '--save_all', action='store_true', default=False,
                        help="Save all stages.")
    parser.add_argument('-o', '--outfile', dest='outfile', metavar='FILE',
                        type=str, action='store',default="template.json",
                        help='file to store the output')
    args = parser.parse_args()

    set_verbosity(args.verbose)

    with Timer() as t:
        #Load all the settings
        model_settings = from_json(args.template_settings)

        #Select a hierarchy
        logging.info('Selected %s hierarchy'%
                     ('normal' if args.normal else 'inverted'))
        params =  select_hierarchy(model_settings['params'],
                                   normal_hierarchy=args.normal)

        #Intialize template maker
        template_maker = TemplateMaker(get_values(params),
                                       **model_settings['binning'])
    profile.info("  ==> elapsed time to initialize templates: %s sec"%t.secs)

    #Now get the actual template
    with Timer(verbose=False) as t:
        template_maps = template_maker.get_template(get_values(params),
Ejemplo n.º 27
0
def trace(self, message, *args, **kws):
    self.log(logging.TRACE, message, *args, **kws)


logging.Logger.trace = trace
logging.RootLogger.trace = trace
logging.trace = logging.root.trace

#Don't move these up, as "trace" might be used in them
from pisa.utils.jsons import from_json
from pisa.resources.resources import find_resource

#Get the logging configuration
#Will search in local dir, $PISA and finally package resources
logconfig = from_json(find_resource('logging.json'))

#Setup the logging system with this config
logging.config.dictConfig(logconfig)

#Make the loggers public
#In case they haven't been defined, this will just inherit from the root logger
physics = logging.getLogger('physics')
profile = logging.getLogger('profile')


def set_verbosity(verbosity):
    '''Overwrite the verbosity level for the root logger
       Verbosity should be an integer with the levels just below.
    '''
    #Ignore if no verbosity is given
first_chi2s_livetime = {}
first_chi2s_livetime["data_NMH"] = {}
first_chi2s_livetime["data_IMH"] = {}

second_significances = {}
second_significances["data_NMH"] = []
second_significances["data_IMH"] = []

second_chi2s_livetime = {}
second_chi2s_livetime["data_NMH"] = {}
second_chi2s_livetime["data_IMH"] = {}

for infile in sorted(os.listdir(first_true_h_fid_dir)):
    if os.path.isfile(first_true_h_fid_dir + infile):
        indict = from_json(first_true_h_fid_dir + infile)
        livetime = indict["template_settings"]["params"]["livetime"]["value"]
        livetimevals.append(livetime)
        first_chi2s_livetime["data_NMH"][livetime] = {"true_h_fiducial": [], "false_h_best": []}
        first_chi2s_livetime["data_IMH"][livetime] = {"true_h_fiducial": [], "false_h_best": []}
        second_chi2s_livetime["data_NMH"][livetime] = {"true_h_fiducial": [], "false_h_best": []}
        second_chi2s_livetime["data_IMH"][livetime] = {"true_h_fiducial": [], "false_h_best": []}

livetimevals = sorted(livetimevals)

for livetime in livetimevals:

    # Get chisquare values for first octant true_h_fiducial distributions
    for trueinfile in sorted(os.listdir(first_true_h_fid_dir)):
        if os.path.isfile(first_true_h_fid_dir + trueinfile):
            indict = from_json(first_true_h_fid_dir + trueinfile)
Ejemplo n.º 29
0
Archivo: log.py Proyecto: mamday/pisa
def trace(self, message, *args, **kws):
    self.log(logging.TRACE, message, *args, **kws)


logging.Logger.trace = trace
logging.RootLogger.trace = trace
logging.trace = logging.root.trace

# Don't move these up, as "trace" might be used in them
from pisa.utils.jsons import from_json
from pisa.resources.resources import find_resource

# Get the logging configuration
# Will search in local dir, $PISA and finally package resources
logconfig = from_json(find_resource("logging.json"))

# Setup the logging system with this config
logging.config.dictConfig(logconfig)

# Make the loggers public
# In case they haven't been defined, this will just inherit from the root logger
physics = logging.getLogger("physics")
profile = logging.getLogger("profile")


def set_verbosity(verbosity):
    """Overwrite the verbosity level for the root logger
       Verbosity should be an integer with the levels just below.
    """
    # Ignore if no verbosity is given
Ejemplo n.º 30
0
def load_interpolated_hypersurfaces(input_file):
    '''
    Load a set of interpolated hypersurfaces from a file.

    Analogously to "load_hypersurfaces", this function returns a
    collection with a HypersurfaceInterpolator object for each Map.

    Parameters
    ----------
    input_file : str
        A JSON input file as produced by fit_hypersurfaces if interpolation params
        were given. It has the form::
            {
                interpolation_param_spec = {
                    'param1': {"values": [val1_1, val1_2, ...], "scales_log": True/False}
                    'param2': {"values": [val2_1, val2_2, ...], "scales_log": True/False}
                    ...
                    'paramN': {"values": [valN_1, valN_2, ...], "scales_log": True/False}
                },
                'hs_fits': [
                    <list of dicts where keys are map names such as 'nue_cc' and values
                    are hypersurface states>
                ]
            }

    Returns
    -------
    collections.OrderedDict
        dictionary with a :obj:`HypersurfaceInterpolator` for each map
    '''
    assert isinstance(input_file, str)

    if input_file.endswith("json") or input_file.endswith("json.bz2"):
        logging.info(f"Loading interpolated hypersurfaces from file: {input_file}")
        input_data = from_json(input_file)
        assert set(['interpolation_param_spec', 'hs_fits']).issubset(
            set(input_data.keys())), 'missing keys'
        map_names = None
        # input_data['hs_fits'] is a list of dicts, each dict contains "param_values"
        # and "hs_fit"
        logging.info("Reading file complete, generating hypersurfaces...")
        for hs_fit_dict in input_data['hs_fits']:
            # this is still not the actual Hypersurface, but a dict with the (linked)
            # maps and the HS fit for the map...
            hs_state_maps = hs_fit_dict["hs_fit"]
            if map_names is None:
                map_names = list(hs_state_maps.keys())
            else:
                assert set(map_names) == set(hs_state_maps.keys()), "inconsistent maps"
            # When data is recovered from JSON, the object states are not automatically
            # converted to the corresponding objects, so we need to do it manually here.
            for map_name in map_names:
                hs_state_maps[map_name] = Hypersurface.from_state(hs_state_maps[map_name])

        logging.info(f"Read hypersurface maps: {map_names}")
        
        # Now we have a list of dicts where the map names are on the lower level.
        # We need to convert this into a dict of HypersurfaceInterpolator objects.
        output = collections.OrderedDict()
        for m in map_names:
            hs_fits = [{"param_values": fd["param_values"], "hs_fit": fd['hs_fit'][m]} for fd in input_data['hs_fits']]
            output[m] = HypersurfaceInterpolator(input_data['interpolation_param_spec'], hs_fits)
    else:
        raise Exception("unknown file format")
    return output
Ejemplo n.º 31
0
def run_interpolated_fit(fit_directory, job_idx, skip_successful=False):
    """Run the hypersurface fit for a grid point.
    
    If `skip_successful` is true, do not run if the `fit_successful` flag is already
    True.
    """
    
    assert os.path.isdir(fit_directory), "fit directory does not exist"
    
    gridpoint_json = os.path.join(fit_directory, f"gridpoint_{job_idx:06d}.json.bz2")
    gridpoint_data = from_json(gridpoint_json)

    if skip_successful and gridpoint_data["fit_successful"]:
        logging.info(f"Fit at job index {job_idx} already successful, skipping...")
        return

    metadata = from_json(os.path.join(fit_directory, "metadata.json"))
    
    interpolation_param_spec = metadata["interpolation_param_spec"]
    
    # this is a pipeline configuration in the form of an OrderedDict
    nominal_dataset = metadata["nominal_dataset"]
    # Why can we still not load PISA objects from JSON that are inside a dict?! Grrr...
    nominal_dataset["pipeline_cfg"] = pipeline_cfg_from_states(
        nominal_dataset["pipeline_cfg"]
    )
    # this is a list of pipeline configurations
    sys_datasets = metadata["sys_datasets"]
    for sys_dataset in sys_datasets:
        sys_dataset["pipeline_cfg"] = pipeline_cfg_from_states(
            sys_dataset["pipeline_cfg"]
        )
    # this is a dict of param_name : value pairs
    param_values = gridpoint_data["param_values"]
    # we do a redundant check to make sure the parameter values at this grid point are
    # correct
    interpolation_param_names = metadata["interpolation_param_names"]
    grid_shape = tuple(metadata["grid_shape"])
    # the grid point index of this job
    grid_idx = list(np.ndindex(grid_shape))[job_idx]
    for i, n in enumerate(interpolation_param_names):
        ms = "Inconsistent parameter values at grid point!"
        assert interpolation_param_spec[n]["values"][grid_idx[i]] == param_values[n], ms
    
    # now we need to adjust the values of the parameter in all pipelines for this point
    logging.info(f"updating pipelines with parameter values: {param_values}")
    for dataset in [nominal_dataset] + sys_datasets:
        for stage_cfg in dataset["pipeline_cfg"].values():
            if "params" not in stage_cfg.keys(): continue
            for param in interpolation_param_names:
                if param in stage_cfg["params"].names:
                    stage_cfg["params"][param].value = param_values[param]
    
    # these are the parameters of the hypersurface, NOT the ones we interpolate them
    # over!
    hypersurface_params = []
    for param_state in metadata["hypersurface_params"]:
        hypersurface_params.append(HypersurfaceParam.from_state(param_state))
    
    # We create Pipeline objects, get their outputs and then forget about the Pipeline
    # object on purpose! The memory requirement to hold all systematic sets at the same
    # time is just too large, especially on the cluster. The way we do it below we
    # only need enough memory for one dataset at a time.
    nominal_dataset["mapset"] = Pipeline(nominal_dataset["pipeline_cfg"]).get_outputs()
    for sys_dataset in sys_datasets:
        sys_dataset["mapset"] = Pipeline(sys_dataset["pipeline_cfg"]).get_outputs()
    
    # Merge maps according to the combine regex, if one was provided
    combine_regex = metadata["combine_regex"]
    if combine_regex is not None:
        nominal_dataset["mapset"] = nominal_dataset["mapset"].combine_re(combine_regex)
        for sys_dataset in sys_datasets:
            sys_dataset["mapset"] = sys_dataset["mapset"].combine_re(combine_regex)

    hypersurface_fit_kw = metadata["hypersurface_fit_kw"]
    hypersurfaces = collections.OrderedDict()
    log = metadata["log"]  # flag determining whether hs fit is run in log-space or not
    for map_name in nominal_dataset["mapset"].names:
        nominal_map = nominal_dataset["mapset"][map_name]
        nominal_param_values = nominal_dataset["sys_params"]

        sys_maps = [sys_dataset["mapset"][map_name] for sys_dataset in sys_datasets]
        sys_param_values = [sys_dataset["sys_params"] for sys_dataset in sys_datasets]

        hypersurface = Hypersurface(
            # Yes, this MUST be a deepcopy! Otherwise weird memory overwrites happen
            # and all the numbers get jumbled across the hypersurfaces of different maps
            params=copy.deepcopy(hypersurface_params),
            initial_intercept=0. if log else 1.,  # Initial value for intercept
            log=log
        )

        hypersurface.fit(
            nominal_map=nominal_map,
            nominal_param_values=nominal_param_values,
            sys_maps=sys_maps,
            sys_param_values=sys_param_values,
            norm=True,
            # Is the space or loading time really a problem?
            # keep_maps=False,  # it would take a lot more space otherwise
            **hypersurface_fit_kw
        )

        logging.debug("\nFitted hypersurface report:\n%s" % hypersurface)
        hypersurfaces[map_name] = hypersurface

    gridpoint_data["hs_fit"] = hypersurfaces
    gridpoint_data["fit_successful"] = True
    
    to_json(gridpoint_data, gridpoint_json)
Ejemplo n.º 32
0
                    help='''Settings related to the optimizer used in the LLR analysis.''')
parser.add_argument('-n','--ntrials',type=int, default = 1,
                    help="Number of trials to run")
parser.add_argument('-s','--save-steps',action='store_true',default=False,
                    dest='save_steps',
                    help="Save all steps the optimizer takes.")
parser.add_argument('-o','--outfile',type=str,default='llh_data.json',metavar='JSONFILE',
                    help="Output filename.")
parser.add_argument('-v', '--verbose', action='count', default=None,
                    help='set verbosity level')
args = parser.parse_args()

set_verbosity(args.verbose)

#Read in the settings
template_settings = from_json(args.template_settings)
minimizer_settings  = from_json(args.minimizer_settings)

#Workaround for old scipy versions
import scipy
if scipy.__version__ < '0.12.0':
    logging.warn('Detected scipy version %s < 0.12.0'%scipy.__version__)
    if 'maxiter' in minimizer_settings:
      logging.warn('Optimizer settings for \"maxiter\" will be ignored')
      minimizer_settings.pop('maxiter')

#Get the parameters
params = template_settings['params']

#store results from all the trials
trials = []
                    help="Plot the fits of DOM efficiency and hole ice for each bin.")
parser.add_argument('--detector',type=str,default='',
                    help="Name of detector to put in plot titles")
parser.add_argument('--selection',type=str,default='',
                    help="Name of selection to put in plot titles")
parser.add_argument('-o','--outdir',type=str,
                    metavar='DIR', required = True, help='''Output directory''')
args = parser.parse_args()

#Read in the settings
detector = args.detector
selection = args.selection
outdir = args.outdir
utils.mkdir(outdir)
utils.mkdir(outdir+'/plots/')
template_settings = from_json(args.template_settings)
czbin_edges = template_settings['binning']['czbins']
ebin_edges = template_settings['binning']['ebins']
channel = template_settings['params']['channel']['value']
x_steps = 0.0001

if args.sim == '4digit':
    MC_name = '1XXX'
elif args.sim == '5digit':
    MC_name = '1XXXX'
elif args.sim == 'dima':
    MC_name = 'Dima'
else:
    MC_name = 'Other'

params = get_values(select_hierarchy(template_settings['params'],normal_hierarchy=True))
Ejemplo n.º 34
0
 def fromFile(cls, filename):
     """
     Load a Fisher matrix from a json file
     """
     
     return cls(**from_json(filename))
chi2s_theta23['data_IMH'] = {}
chi2s_livetime = {}
chi2s_livetime['data_NMH'] = {}
chi2s_livetime['data_IMH'] = {}

chi2s = {}
chi2s['data_NMH'] = {}
chi2s['data_NMH']['true_h_fiducial'] = []
chi2s['data_NMH']['false_h_best'] = []
chi2s['data_IMH'] = {}
chi2s['data_IMH']['true_h_fiducial'] = []
chi2s['data_IMH']['false_h_best'] = []

for infile in sorted(os.listdir(true_h_fid_dir)):
    if os.path.isfile(true_h_fid_dir+infile):
        indict = from_json(true_h_fid_dir+infile)
        if theta23analysis == True:
            theta23_nh = indict['template_settings']['params']['theta23_nh']['value']
            theta23_ih = indict['template_settings']['params']['theta23_ih']['value']
            assert(theta23_nh == theta23_ih)
            theta23vals.append(theta23_nh)
            sin2theta23vals.append(math.pow(math.sin(theta23_nh),2))
            chi2s_theta23['data_NMH'][theta23_nh] = {'true_h_fiducial': [], 'false_h_best': []}
            chi2s_theta23['data_IMH'][theta23_nh] = {'true_h_fiducial': [], 'false_h_best': []}
            
        if livetimeanalysis == True:
            livetime = indict['template_settings']['params']['livetime']['value']
            livetimevals.append(livetime)
            chi2s_livetime['data_NMH'][livetime] = {'true_h_fiducial': [], 'false_h_best': []}
            chi2s_livetime['data_IMH'][livetime] = {'true_h_fiducial': [], 'false_h_best': []}
Ejemplo n.º 36
0
                    '--outfile',
                    type=str,
                    default='alt_hypo_study.json',
                    metavar='JSONFILE',
                    help="Output filename.")
parser.add_argument('-v',
                    '--verbose',
                    action='count',
                    default=None,
                    help='set verbosity level')
args = parser.parse_args()

set_verbosity(args.verbose)

#Read in the settings
template_settings = from_json(args.template_settings)
minimizer_settings = from_json(args.minimizer_settings)
grid_settings = from_json(args.grid_settings)

if args.gpu_id is not None:
    template_settings['params']['gpu_id'] = {}
    template_settings['params']['gpu_id']['value'] = args.gpu_id
    template_settings['params']['gpu_id']['fixed'] = True

with Timer() as t:
    template_maker = TemplateMaker(get_values(template_settings['params']),
                                   **template_settings['binning'])
profile.info("==> elapsed time to initialize templates: %s sec" % t.secs)

#Get the parameters
params = template_settings['params']
Ejemplo n.º 37
0
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams['text.usetex'] = True
import scipy.interpolate

from pisa.utils.jsons import from_json

indict = from_json('/Users/steven/IceCube/PISA/pisa/pisa/resources/aeff/relative-crosssections/genie-weigthed-crosssections.json')

axislabels = {}
axislabels['nue'] = r'$\nu_e$ CC'
axislabels['nue_bar'] = r'$\bar{\nu}_e$ CC'
axislabels['numu'] = r'$\nu_{\mu}$ CC'
axislabels['numu_bar'] = r'$\bar{\nu}_{\mu}$ CC'
axislabels['nutau'] = r'$\nu_{\tau}$ CC'
axislabels['nutau_bar'] = r'$\bar{\nu}_{\tau}$ CC'
axislabels['nuall'] = r'$\nu$ NC'
axislabels['nuallbar'] = r'$\bar{\nu}$ NC'

GENIElabels = {}
GENIElabels['MaRES'] = r'$M_A^{RES}$'
GENIElabels['MaCCQE'] = r'$M_A^{CCQE}$'
GENIElabels['AhtBY'] = r'$A_{HT}$'
GENIElabels['BhtBY'] = r'$B_{BT}$'
GENIElabels['CV1uBY'] = r'$C_{\nu1u}$'
GENIElabels['CV2uBY'] = r'$C_{\nu2u}$'

GENIEcolours = {}
GENIEcolours['MaRES'] = 'b'
GENIEcolours['MaCCQE'] = 'r'
Ejemplo n.º 38
0
                    default=False, dest='save_templates',
                    help="Do not save the templates for the different test points.")

  parser.add_argument('-o','--outdir',type=str,default=os.getcwd(),metavar='DIR',
                    help="Output directory")

  parser.add_argument('-v', '--verbose', action='count', default=None,
                    help='set verbosity level')

  args = parser.parse_args()

  # Set verbosity level
  set_verbosity(args.verbose)

  # Read the template settings
  template_settings = from_json(args.template_settings)

  # This file only contains the number of test points for each parameter (and perhaps eventually a non-linearity criterion)
  grid_settings  = from_json(args.grid_settings)

  # Get the Fisher matrices for the desired hierarchy and fiducial settings
  fisher_matrices = get_fisher_matrices(template_settings,grid_settings,args.inverted_truth,args.normal_truth,
                                    args.dump_all_stages,args.save_templates,args.outdir)

  
  # Fisher matrices are saved in any case
  for data_tag in fisher_matrices:
    fisher_basename = 'fisher_data_%s'%data_tag
    for chan in fisher_matrices[data_tag]:
      if chan == 'comb':
        outfile = os.path.join(args.outdir,fisher_basename+'.json')
parser.add_argument('-IH','--IH_osc_dir', type=str, required=True, help="Directory containing oscillation probabilties for different oversampling values for inverted ordring.")
parser.add_argument('-r','--reference',type=str,
                    help="Reference value for oversampling (i.e. highest value used)")
args = parser.parse_args()

NH_path = args.NH_osc_dir
IH_path = args.IH_osc_dir
reference = args.reference

NH_vals = {}
IH_vals = {}
os_vals = []

for f in sorted(os.listdir(NH_path)):
    if os.path.isfile(NH_path+f):
        osc_file = from_json(NH_path+f)
        for os_key in osc_file.keys():
            NH_vals[os_key] = osc_file[os_key]
            if int(os_key) not in os_vals:
                os_vals.append(int(os_key))

for f in sorted(os.listdir(IH_path)):
    if os.path.isfile(IH_path+f):
        osc_file = from_json(IH_path+f)
        for os_key in osc_file.keys():
            IH_vals[os_key] = osc_file[os_key]               

os_vals = sorted(os_vals)
            
times = []
oversamples = []
Ejemplo n.º 40
0
titles["numubar"] = r"$\bar{\nu}_{\mu}$"

titles["nue_cc"] = r"$\nu_e$ CC"
titles["numu_cc"] = r"$\nu_{\mu}$ CC"
titles["nutau_cc"] = r"$\nu_{\tau}$ CC"
titles["nuall_nc"] = r"$\nu$ NC"
titles["nuebar_cc"] = r"$\bar{\nu}_e$ CC"
titles["numubar_cc"] = r"$\bar{\nu}_{\mu}$ CC"
titles["nutaubar_cc"] = r"$\bar{\nu}_{\tau}$ CC"
titles["nuallbar_nc"] = r"$\bar{\nu}$ NC"

titles["trck"] = r"Track-Like"
titles["cscd"] = r"Cascade-Like"

try:
    cake_array1 = from_json(args.cake_file1)["maps"]
except:
    cake_array1 = from_json(args.cake_file1)

try:
    cake_array2 = from_json(args.cake_file2)["maps"]
except:
    cake_array2 = from_json(args.cake_file2)

for cake_dict1, cake_dict2 in zip(cake_array1, cake_array2):

    assert cake_dict1["name"] == cake_dict2["name"]

    cake_map1 = {}
    cake_map1["map"] = cake_dict1["hist"].T
Ejemplo n.º 41
0
        dest="outfile",
        metavar="FILE",
        type=str,
        action="store",
        default="event_rate.json",
        help="""file to store the output""",
    )
    parser.add_argument("-v", "--verbose", action="count", default=None, help="""set verbosity level""")
    args = parser.parse_args()

    # Set verbosity level
    set_verbosity(args.verbose)

    # Check binning
    ebins, czbins = check_binning(args.osc_flux_maps)

    logging.info("Defining aeff_service...")

    if args.mc_mode:
        logging.info("  Using effective area from EVENT DATA...")
        aeff_service = AeffServiceMC(ebins, czbins, aeff_weight_file=args.weighted_aeff_file)
    else:
        logging.info("  Using effective area from PARAMETRIZATION...")
        aeff_settings = from_json(find_resource(args.settings_file))
        aeff_service = AeffServicePar(ebins, czbins, **aeff_settings)

    event_rate_maps = get_event_rates(args.osc_flux_maps, aeff_service, args.livetime, args.aeff_scale)

    logging.info("Saving output to: %s" % args.outfile)
    to_json(event_rate_maps, args.outfile)
Ejemplo n.º 42
0
from pisa.utils.jsons import from_json, to_json
from copy import deepcopy as copy

parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
parser.add_argument(
    'papa_file',
    type=str,
    metavar='JSON',
    help='papa settings file, containing the resolution parameterizations.')
parser.add_argument('outfile',
                    type=str,
                    metavar='JSON',
                    help='output file to store resolutions in PISA-format.')
args = parser.parse_args()

papa_settings = from_json(args.papa_file)
parameterizations = papa_settings['fiducial']['reco_parametrization']['value']

pisa_flavs = ['nue', 'numu', 'nutau']
mID = ['', '_bar']
intType = ['cc', 'nc']
recoType = ['coszen', 'energy']

papa_NC = parameterizations['NC']
egy_res = papa_NC['e']
papa_NC.pop('e')
papa_NC['energy'] = egy_res

pisa_reco_settings = {}
for flav in pisa_flavs:
    papa_res = parameterizations[flav]
Ejemplo n.º 43
0
Archivo: PID.py Proyecto: mdunkman/pisa
    #Only show errors while parsing 
    set_verbosity(0)
    parser = ArgumentParser(description='Takes a reco event rate file '
                            'as input and produces a set of reconstructed templates '
                            'of tracks and cascades.',
                            formatter_class=RawTextHelpFormatter)
    parser.add_argument('reco_event_maps',metavar='RECOEVENTS',type=from_json,
                        help='''JSON reco event rate file with following parameters:
      {"nue_cc": {'czbins':[...], 'ebins':[...], 'map':[...]}, 
       "numu_cc": {...},
       "nutau_cc": {...},
       "nuall_nc": {...} }''')

    parser.add_argument('--settings',metavar='SETTINGS',type=from_json,
                        default=from_json(find_resource('pid/V15_pid.json')),
                        help='''json file containing parameterizations of the particle ID for each event type.''')

    parser.add_argument('-o', '--outfile', dest='outfile', metavar='FILE', type=str,
                        action='store',default="pid.json",
                        help='''file to store the output''')
    parser.add_argument('-v', '--verbose', action='count', default=0,
                        help='''set verbosity level''')
    args = parser.parse_args()

    #Set verbosity level
    set_verbosity(args.verbose)

    #Check binning
    ebins, czbins = check_binning(args.reco_event_maps)
Ejemplo n.º 44
0
                        type=str,
                        default=os.getcwd(),
                        metavar='DIR',
                        help="Output directory")
    parser.add_argument('-v',
                        '--verbose',
                        action='count',
                        default=None,
                        help='set verbosity level')
    args = parser.parse_args()

    # Set verbosity level
    set_verbosity(args.verbose)

    # Read the template settings
    template_settings = from_json(args.template_settings)

    # This file only contains the number of test points for each parameter (and
    # perhaps eventually a non-linearity criterion)
    grid_settings = from_json(args.grid_settings)

    # Get the Fisher matrices for the desired hierarchy and fiducial settings
    fisher_matrices = get_fisher_matrices(template_settings=template_settings,
                                          grid_settings=grid_settings,
                                          IMH=args.inverted_truth,
                                          NMH=args.normal_truth,
                                          dump_all_stages=args.dump_all_stages,
                                          save_templates=args.save_templates,
                                          outdir=args.outdir)

    # Fisher matrices are saved in any case
Ejemplo n.º 45
0
    logging.warn("processing " + str(len(args.infiles)) + " files...")
    logging.warn("Saving to file: %s" % args.outfile)

    mod_num = len(args.infiles) / 20

    start_time = datetime.now()

    minimizer_settings = {}
    template_settings = {}
    pseudo_data_settings = {}
    trials = {}
    for i, filename in enumerate(args.infiles):
        if mod_num > 0:
            if i % mod_num == 0: print "  >> %d files done..." % i
        try:
            data = from_json(filename)
        except:
            print "Skipping file: ", filename
            continue

        if not minimizer_settings:
            minimizer_settings = data['minimizer_settings']

        if not template_settings:
            template_settings = data['template_settings']

        if not pseudo_data_settings:
            try:
                pseudo_data_settings = data['pseudo_data_settings']
            except:
                pass
Ejemplo n.º 46
0
titles['nue_bar'] = r'$\bar{\nu}_e$'
titles['nuebar'] = r'$\bar{\nu}_e$'
titles['numu'] = r'$\nu_{\mu}$'
titles['numu_bar'] = r'$\bar{\nu}_{\mu}$'
titles['numubar'] = r'$\bar{\nu}_{\mu}$'

titles['nue_cc'] = r'$\nu_e$ CC'
titles['numu_cc'] = r'$\nu_{\mu}$ CC'
titles['nutau_cc'] = r'$\nu_{\tau}$ CC'
titles['nuall_nc'] = r'$\nu$ NC'

titles['trck'] = r'Track-Like'
titles['cscd'] = r'Cascade-Like'

try:
    cake_array = from_json(args.cake_file)['maps']
except:
    cake_array = from_json(args.cake_file)
    
pisa_dict = from_json(args.pisa_file)

for cake_dict in cake_array:

    if cake_dict['name'] == 'numubar':
        pisa_map = pisa_dict['numu_bar']
    elif cake_dict['name'] == 'nuebar':
        pisa_map = pisa_dict['nue_bar']
    else:
        pisa_map = pisa_dict[cake_dict['name']]
    cake_map = {}
    cake_map['map'] = cake_dict['hist'].T
free_chi2s_livetime = {}
free_chi2s_livetime['data_NMH'] = {}
free_chi2s_livetime['data_IMH'] = {}

prior_significances = {}
prior_significances['data_NMH'] = []
prior_significances['data_IMH'] = []

prior_chi2s_livetime = {}
prior_chi2s_livetime['data_NMH'] = {}
prior_chi2s_livetime['data_IMH'] = {}

for infile in sorted(os.listdir(free_true_h_fid_dir)):
    if os.path.isfile(free_true_h_fid_dir+infile):
        indict = from_json(free_true_h_fid_dir+infile)
        livetime = indict['template_settings']['params']['livetime']['value']
        livetimevals.append(livetime)
        free_chi2s_livetime['data_NMH'][livetime] = {'true_h_fiducial': [], 'false_h_best': []}
        free_chi2s_livetime['data_IMH'][livetime] = {'true_h_fiducial': [], 'false_h_best': []}
        prior_chi2s_livetime['data_NMH'][livetime] = {'true_h_fiducial': [], 'false_h_best': []}
        prior_chi2s_livetime['data_IMH'][livetime] = {'true_h_fiducial': [], 'false_h_best': []}

livetimevals = sorted(livetimevals)

for livetime in livetimevals:

    # Get chisquare values for free octant true_h_fiducial distributions
    for trueinfile in sorted(os.listdir(free_true_h_fid_dir)):
        if os.path.isfile(free_true_h_fid_dir+trueinfile):
            indict = from_json(free_true_h_fid_dir+trueinfile)
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter

from pisa.utils.jsons import from_json, to_json
import numpy as np

parser = ArgumentParser(description='''Determines the false_h_best fiducial distribution, under the Gaussian assumption.''',
                        formatter_class=ArgumentDefaultsHelpFormatter)
parser.add_argument('-p','--pisa_file', type=str, required=True,
                    help="File containing PISA V2 event by event data")
args = parser.parse_args()

pisa_dict = from_json(args.pisa_file)['pid']['reduced']

to_json(pisa_dict,'better.json')
    
Ejemplo n.º 49
0
def plot_clsim_table_summary(
        summaries, formats=None, outdir=None, no_legend=False
    ):
    """Plot the table summary produced by `summarize_clsim_table`.

    Plots are made of marginalized 1D distributions, where mean, median, and/or
    max are used to marginalize out the remaining dimensions (where those are
    present in the summaries)..

    Parameters
    ----------
    summaries : string, summary, or iterable thereof
        If string(s) are provided, each is glob-expanded. See
        :method:`glob.glob` for valid syntax.

    formats : None, string, or iterable of strings in {'pdf', 'png'}
        If no formats are provided, the plot will not be saved.

    outdir : None or string
        If `formats` is specified and `outdir` is None, the plots are
        saved to the present working directory.

    no_legend : bool, optional
        Do not display legend on plots (default is to display a legend)

    Returns
    -------
    all_figs : list of three :class:`matplotlib.figure.Figure`

    all_axes : list of three lists of :class:`matplotlib.axes.Axes`

    summaries : list of :class:`collections.OrderedDict`
        List of all summaries loaded

    """
    orig_summaries = deepcopy(summaries)

    if isinstance(summaries, (basestring, Mapping)):
        summaries = [summaries]

    tmp_summaries = []
    for summary in summaries:
        if isinstance(summary, Mapping):
            tmp_summaries.append(summary)
        elif isinstance(summary, basestring):
            tmp_summaries.extend(glob(expand(summary)))
    summaries = tmp_summaries

    for summary_n, summary in enumerate(summaries):
        if isinstance(summary, basestring):
            summary = from_json(summary)
            summaries[summary_n] = summary

    if formats is None:
        formats = []
    elif isinstance(formats, basestring):
        formats = [formats]

    if outdir is not None:
        outdir = expand(outdir)
        mkdir(outdir)

    n_summaries = len(summaries)

    if n_summaries == 0:
        raise ValueError(
            'No summaries found based on argument `summaries`={}'
            .format(orig_summaries)
        )

    for n, fmt in enumerate(formats):
        fmt = fmt.strip().lower()
        assert fmt in ('pdf', 'png'), fmt
        formats[n] = fmt

    all_items = OrderedDict()
    for summary in summaries:
        for key, value in summary.items():
            if key == 'dimensions':
                continue
            if not all_items.has_key(key):
                all_items[key] = []
            all_items[key].append(value)

    same_items = OrderedDict()
    different_items = OrderedDict()
    for key, values in all_items.items():
        all_same = True
        ref_value = values[0]
        for value in values[1:]:
            if np.any(value != ref_value):
                all_same = False

        if all_same:
            same_items[key] = values[0]
        else:
            different_items[key] = values

    if n_summaries > 1:
        if same_items:
            print('Same for all:\n{}'.format(same_items.keys()))
        if different_items:
            print('Different for some or all:\n{}'
                  .format(different_items.keys()))

    same_label = formatter(same_items)

    summary_has_detail = False
    if set(['string', 'depth_idx', 'seed']).issubset(all_items.keys()):
        summary_has_detail = True
        strings = sorted(set(all_items['string']))
        depths = sorted(set(all_items['depth_idx']))
        seeds = sorted(set(all_items['seed']))

    plot_kinds = ('mean', 'median', 'max')
    plot_kinds_with_data = set()
    dim_names = summaries[0]['dimensions'].keys()
    n_dims = len(dim_names)

    fig_x = 10 # inches
    fig_header_y = 0.35 # inches
    fig_one_axis_y = 5 # inches
    fig_all_axes_y = n_dims * fig_one_axis_y
    fig_y = fig_header_y + fig_all_axes_y # inches

    all_figs = []
    all_axes = []

    for plot_kind in plot_kinds:
        fig, f_axes = plt.subplots(
            nrows=n_dims, ncols=1, squeeze=False, figsize=(fig_x, fig_y)
        )
        all_figs.append(fig)
        f_axes = list(f_axes.flat)
        for ax in f_axes:
            ax.set_prop_cycle('color', COLOR_CYCLE_ORTHOG)
        all_axes.append(f_axes)

    n_lines = 0
    xlims = [[np.inf, -np.inf]] * n_dims

    summaries_order = []
    if summary_has_detail:
        for string, depth_idx, seed in product(strings, depths, seeds):
            for summary_n, summary in enumerate(summaries):
                if (summary['string'] != string
                        or summary['depth_idx'] != depth_idx
                        or summary['seed'] != seed):
                    continue
                summaries_order.append((summary_n, summary))
    else:
        for summary_n, summary in enumerate(summaries):
            summaries_order.append((summary_n, summary))

    labels_assigned = set()
    for summary_n, summary in summaries_order:
        different_label = formatter({k: v[summary_n] for k, v in different_items.items()})

        if different_label:
            label = different_label
            if label in labels_assigned:
                label = None
            else:
                labels_assigned.add(label)
        else:
            label = None

        for dim_num, dim_name in enumerate(dim_names):
            dim_info = summary['dimensions'][dim_name]
            dim_axes = [f_axes[dim_num] for f_axes in all_axes]
            bin_edges = summary[dim_name + '_bin_edges']
            if dim_name == 'deltaphidir':
                bin_edges /= np.pi
            xlims[dim_num] = [
                min(xlims[dim_num][0], np.min(bin_edges)),
                max(xlims[dim_num][1], np.max(bin_edges))
            ]
            for ax, plot_kind in zip(dim_axes, plot_kinds):
                if plot_kind not in dim_info:
                    continue
                plot_kinds_with_data.add(plot_kind)
                vals = dim_info[plot_kind]
                ax.step(bin_edges, [vals[0]] + list(vals),
                        linewidth=1, clip_on=True,
                        label=label)
                n_lines += 1

    dim_labels = dict(
        r=r'$r$',
        costheta=r'$\cos\theta$',
        t=r'$t$',
        costhetadir=r'$\cos\theta_{\rm dir}$',
        deltaphidir=r'$\Delta\phi_{\rm dir}$'
    )
    units = dict(r='m', t='ns', deltaphidir=r'rad/$\pi$')

    logx_dims = []
    logy_dims = ['r', 'time', 'deltaphidir']

    flabel = ''
    same_flabel = formatter(same_items, fname=True)
    different_flabel = formatter(different_items, key_only=True, fname=True)
    if same_flabel:
        flabel += '__same__' + same_flabel
    if different_flabel:
        flabel += '__differ__' + different_flabel

    for kind_idx, (plot_kind, fig) in enumerate(zip(plot_kinds, all_figs)):
        if plot_kind not in plot_kinds_with_data:
            continue
        for dim_num, (dim_name, ax) in enumerate(zip(dim_names, all_axes[kind_idx])):
            #if dim_num == 0 and different_items:
            if different_items and not no_legend:
                ax.legend(loc='best', frameon=False,
                          prop=dict(size=7, family='monospace'))

            ax.spines['top'].set_visible(False)
            ax.spines['right'].set_visible(False)
            ax.yaxis.set_ticks_position('none')
            ax.xaxis.set_ticks_position('none')
            ax.xaxis.tick_bottom()
            ax.yaxis.tick_left()

            ax.set_xlim(xlims[dim_num])

            xlabel = dim_labels[dim_name]
            if dim_name in units:
                xlabel += ' ({})'.format(units[dim_name])
            ax.set_xlabel(xlabel)
            if dim_name in logx_dims:
                ax.set_xscale('log')
            if dim_name in logy_dims:
                ax.set_yscale('log')

        fig.tight_layout(rect=(0, 0, 1, fig_all_axes_y/fig_y))
        suptitle = (
            'Marginalized distributions (taking {} over all other axes)'
            .format(plot_kind)
        )
        if same_label:
            suptitle += '\n' + same_label
        fig.suptitle(suptitle, y=(fig_all_axes_y + fig_header_y*0.8) / fig_y,
                     fontsize=9)

        for fmt in formats:
            outfpath = ('clsim_table_summaries{}__{}.{}'
                        .format(flabel, plot_kind, fmt))
            if outdir:
                outfpath = join(outdir, outfpath)
            fig.savefig(outfpath, dpi=300)
            print('Saved image to "{}"'.format(outfpath))

    return all_figs, all_axes, summaries
Ejemplo n.º 50
0
def run_interpolated_fit(fit_directory, job_idx, skip_successful=False):
    """Run the hypersurface fit for a grid point.

    If `skip_successful` is true, do not run if the `fit_successful` flag is already
    True.
    """

    #TODO a lot of this is copied from fit_hypersurfaces in hypersurface.py, would be safer to make more OAOO
    #TODO Copy the param value storage stuff from fit_hypersurfaces across in the meantime

    assert os.path.isdir(fit_directory), "fit directory does not exist"

    gridpoint_json = os.path.join(fit_directory,
                                  f"gridpoint_{job_idx:06d}.json.bz2")
    gridpoint_data = from_json(gridpoint_json)

    if skip_successful and gridpoint_data["fit_successful"]:
        logging.info(
            f"Fit at job index {job_idx} already successful, skipping...")
        return

    metadata = from_json(os.path.join(fit_directory, "metadata.json"))

    interpolation_param_spec = metadata["interpolation_param_spec"]

    # this is a pipeline configuration in the form of an OrderedDict
    nominal_dataset = metadata["nominal_dataset"]
    # Why can we still not load PISA objects from JSON that are inside a dict?! Grrr...
    nominal_dataset["pipeline_cfg"] = pipeline_cfg_from_states(
        nominal_dataset["pipeline_cfg"])
    # this is a list of pipeline configurations
    sys_datasets = metadata["sys_datasets"]
    for sys_dataset in sys_datasets:
        sys_dataset["pipeline_cfg"] = pipeline_cfg_from_states(
            sys_dataset["pipeline_cfg"])
    # this is a dict of param_name : value pairs
    param_values = gridpoint_data["param_values"]
    # we do a redundant check to make sure the parameter values at this grid point are
    # correct
    interpolation_param_names = metadata["interpolation_param_names"]
    grid_shape = tuple(metadata["grid_shape"])
    # the grid point index of this job
    grid_idx = list(np.ndindex(grid_shape))[job_idx]
    for i, n in enumerate(interpolation_param_names):
        ms = "Inconsistent parameter values at grid point!"
        assert interpolation_param_spec[n]["values"][
            grid_idx[i]] == param_values[n], ms

    # now we need to adjust the values of the parameter in all pipelines for this point
    logging.info(f"updating pipelines with parameter values: {param_values}")
    for dataset in [nominal_dataset] + sys_datasets:
        for stage_cfg in dataset["pipeline_cfg"].values():
            if "params" not in stage_cfg.keys(): continue
            for param in interpolation_param_names:
                if param in stage_cfg["params"].names:
                    stage_cfg["params"][param].value = param_values[param]

    # these are the parameters of the hypersurface, NOT the ones we interpolate them
    # over!
    hypersurface_params = []
    for param_state in metadata["hypersurface_params"]:
        hypersurface_params.append(HypersurfaceParam.from_state(param_state))

    def find_hist_stage(pipeline):
        """Locate the index of the hist stage in a pipeline."""
        hist_idx_found = False
        for i, s in enumerate(pipeline.stages):
            if s.__class__.__name__ == "hist":
                hist_idx = i
                hist_idx_found = True
                break
        if not hist_idx_found:
            raise RuntimeError(
                "Could not find histogram stage in pipeline, aborting.")
        return hist_idx

    # We create Pipeline objects, get their outputs and then forget about the Pipeline
    # object on purpose! The memory requirement to hold all systematic sets at the same
    # time is just too large, especially on the cluster. The way we do it below we
    # only need enough memory for one dataset at a time.

    for dataset in [nominal_dataset] + sys_datasets:
        pipeline = Pipeline(dataset["pipeline_cfg"])
        dataset["mapset"] = pipeline.get_outputs()
        # get the un-weighted event counts as well so that we can exclude bins
        # with too little statistics
        # First, find out which stage is the hist stage
        hist_idx = find_hist_stage(pipeline)
        pipeline.stages[hist_idx].unweighted = True
        dataset["mapset_unweighted"] = pipeline.get_outputs()
    del pipeline

    # Merge maps according to the combine regex, if one was provided
    combine_regex = metadata["combine_regex"]
    if combine_regex is not None:
        for dataset in [nominal_dataset] + sys_datasets:
            dataset["mapset"] = dataset["mapset"].combine_re(combine_regex)
            dataset["mapset_unweighted"] = dataset[
                "mapset_unweighted"].combine_re(combine_regex)

    minimum_mc = metadata["minimum_mc"]
    # Remove bins (i.e. set their count to zero) that have too few MC events
    for dataset in sys_datasets + [nominal_dataset]:
        for map_name in dataset["mapset"].names:
            insuff_mc = dataset["mapset_unweighted"][
                map_name].nominal_values < minimum_mc
            # Setting the hist to zero sets both nominal value and std_dev to zero
            dataset["mapset"][map_name].hist[insuff_mc] = 0.

    hypersurface_fit_kw = metadata["hypersurface_fit_kw"]
    hypersurfaces = collections.OrderedDict()
    log = metadata[
        "log"]  # flag determining whether hs fit is run in log-space or not
    for map_name in nominal_dataset["mapset"].names:
        nominal_map = nominal_dataset["mapset"][map_name]
        nominal_param_values = nominal_dataset["sys_params"]

        sys_maps = [
            sys_dataset["mapset"][map_name] for sys_dataset in sys_datasets
        ]
        sys_param_values = [
            sys_dataset["sys_params"] for sys_dataset in sys_datasets
        ]

        hypersurface = Hypersurface(
            # Yes, this MUST be a deepcopy! Otherwise weird memory overwrites happen
            # and all the numbers get jumbled across the hypersurfaces of different maps
            params=copy.deepcopy(hypersurface_params),
            initial_intercept=0. if log else 1.,  # Initial value for intercept
            log=log)

        hypersurface.fit(
            nominal_map=nominal_map,
            nominal_param_values=nominal_param_values,
            sys_maps=sys_maps,
            sys_param_values=sys_param_values,
            norm=True,
            # Is the space or loading time really a problem?
            # keep_maps=False,  # it would take a lot more space otherwise
            **hypersurface_fit_kw)

        logging.debug("\nFitted hypersurface report:\n%s" % hypersurface)
        hypersurfaces[map_name] = hypersurface

    gridpoint_data["hs_fit"] = hypersurfaces
    gridpoint_data["fit_successful"] = True

    to_json(gridpoint_data, gridpoint_json)
Ejemplo n.º 51
0
# author: Timothy C. Arlen
#         [email protected]
#

from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
from pisa.utils.jsons import from_json,to_json
from copy import deepcopy as copy

parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
parser.add_argument('papa_file',type=str,metavar='JSON',
                    help='papa settings file, containing the resolution parameterizations.')
parser.add_argument('outfile',type=str,metavar='JSON',
                    help='output file to store resolutions in PISA-format.')
args = parser.parse_args()

papa_settings = from_json(args.papa_file)
parameterizations = papa_settings['fiducial']['reco_parametrization']['value']

pisa_flavs = ['nue','numu','nutau']
mID = ['','_bar']
intType = ['cc','nc']
recoType = ['coszen','energy']

papa_NC = parameterizations['NC']
egy_res = papa_NC['e']
papa_NC.pop('e')
papa_NC['energy'] = egy_res

pisa_reco_settings = {}
for flav in pisa_flavs:
    papa_res = parameterizations[flav]
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter

from pisa.utils.jsons import from_json, to_json
import numpy as np

parser = ArgumentParser(description='''Determines the false_h_best fiducial distribution, under the Gaussian assumption.''',
                        formatter_class=ArgumentDefaultsHelpFormatter)
parser.add_argument('-c','--cake_file', type=str, required=True,
                    help="File containing CAKE (PISA V3) data")
args = parser.parse_args()

cake_array = from_json(args.cake_file)['maps']

output_array = []

total_trck_dict = {}
total_cscd_dict = {}

for cake_dict in cake_array:
    if 'trck' in cake_dict['name']:
        if len(total_trck_dict.keys()) == 0:
            total_trck_dict = cake_dict
            total_trck_dict['name'] = 'trck'
        else:
            total_trck_dict['hist'] += cake_dict['hist']
    elif 'cscd' in cake_dict['name']:
        if len(total_cscd_dict.keys()) == 0:
            total_cscd_dict = cake_dict
            total_cscd_dict['name'] = 'cscd'
        else:
Ejemplo n.º 53
0
    parser.add_argument('-v',
                        '--verbose',
                        action='count',
                        default=None,
                        help='''set verbosity level''')
    args = parser.parse_args()

    #Set verbosity level
    set_verbosity(args.verbose)

    #Check binning
    ebins, czbins = check_binning(args.osc_flux_maps)

    logging.info("Defining aeff_service...")

    if args.mc_mode:
        logging.info("  Using effective area from EVENT DATA...")
        aeff_service = AeffServiceMC(ebins,
                                     czbins,
                                     aeff_weight_file=args.weighted_aeff_file)
    else:
        logging.info("  Using effective area from PARAMETRIZATION...")
        aeff_settings = from_json(find_resource(args.settings_file))
        aeff_service = AeffServicePar(ebins, czbins, **aeff_settings)

    event_rate_maps = get_event_rates(args.osc_flux_maps, aeff_service,
                                      args.livetime, args.aeff_scale)

    logging.info("Saving output to: %s" % args.outfile)
    to_json(event_rate_maps, args.outfile)
Ejemplo n.º 54
0
parser.add_argument('-pd','--pseudo_data_settings',type=str,
                    metavar='JSONFILE',default=None,
                    help='''Settings for pseudo data templates, if desired to be different from template_settings.''')
parser.add_argument('-s','--save-steps',action='store_true',default=False,
                    dest='save_steps',
                    help="Save all steps the optimizer takes.")
parser.add_argument('-o','--outfile',type=str,default='llh_data.json',metavar='JSONFILE',
                    help="Output filename.")
parser.add_argument('-v', '--verbose', action='count', default=None,
                    help='set verbosity level')
args = parser.parse_args()

set_verbosity(args.verbose)

# Read in the settings
template_settings = from_json(args.template_settings)
minimizer_settings  = from_json(args.minimizer_settings)
pseudo_data_settings = from_json(args.pseudo_data_settings) if args.pseudo_data_settings is not None else template_settings

# Parse the metric to be used
metric_name = 'chisquare' if args.use_chisquare else 'llh'

# Workaround for old scipy versions
import scipy
if scipy.__version__ < '0.12.0':
    logging.warn('Detected scipy version %s < 0.12.0'%scipy.__version__)
    if 'maxiter' in minimizer_settings:
      logging.warn('Optimizer settings for \"maxiter\" will be ignored')
      minimizer_settings.pop('maxiter')

Ejemplo n.º 55
0
def summarize_clsim_table(table_fpath,
                          table=None,
                          save_summary=True,
                          outdir=None):
    """
    Parameters
    ----------
    table_fpath : string
        Path to table (or just the table's filename if `outdir` is specified)

    table : mapping, optional
        If the table has already been loaded, it can be passed here to avoid
        re-loading the table.

    save_summary : bool
        Whether to save the table summary to disk.

    outdir : string, optional
        If `save_summary` is True, write the summary to this directory. If
        `outdir` is not specified and `save_summary` is True, the summary will
        be written to the same directory that contains `table_fpath`.

    Returns
    -------
    table
        See `load_clsim_table` for details of the data structure

    summary : OrderedDict

    """
    t_start = time()
    if save_summary:
        from pisa.utils.jsons import from_json, to_json

    table_fpath = expand(table_fpath)
    srcdir, clsim_fname = dirname(table_fpath), basename(table_fpath)
    invalid_fname = False
    try:
        fname_info = interpret_clsim_table_fname(clsim_fname)
    except ValueError:
        invalid_fname = True
        fname_info = {}

    if outdir is None:
        outdir = srcdir
    outdir = expand(outdir)
    mkdir(outdir)

    if invalid_fname:
        metapath = None
    else:
        metaname = (CLSIM_TABLE_METANAME_PROTO[-1].format(
            hash_val=fname_info['hash_val']))
        metapath = join(outdir, metaname)
    if metapath and isfile(metapath):
        meta = from_json(metapath)
    else:
        meta = dict()

    if table is None:
        table = load_clsim_table(table_fpath)

    summary = OrderedDict()
    for key in table.keys():
        if key == 'table':
            continue
        summary[key] = table[key]
    if fname_info:
        for key in ('hash_val', 'string', 'depth_idx', 'seed'):
            summary[key] = fname_info[key]
    # TODO: Add hole ice info when added to tray_kw_to_hash
    if meta:
        summary['n_events'] = meta['tray_kw_to_hash']['NEvents']
        summary['ice_model'] = meta['tray_kw_to_hash']['IceModel']
        summary['tilt'] = not meta['tray_kw_to_hash']['DisableTilt']
        for key, val in meta.items():
            if key.endswith('_binning_kw'):
                summary[key] = val
    elif 'fname_version' in fname_info and fname_info['fname_version'] == 1:
        summary['n_events'] = fname_info['n_events']
        summary['ice_model'] = 'spice_mie'
        summary['tilt'] = False
        summary['r_binning_kw'] = dict(min=0.0, max=400.0, n_bins=200, power=2)
        summary['costheta_binning_kw'] = dict(min=-1, max=1, n_bins=40)
        summary['t_binning_kw'] = dict(min=0.0, max=3000.0, n_bins=300)
        summary['costhetadir_binning_kw'] = dict(min=-1, max=1, n_bins=20)
        summary['deltaphidir_binning_kw'] = dict(min=0.0, max=np.pi, n_bins=20)

    # Save marginal distributions and info to file
    norm = (
        1 / table['n_photons'] /
        (SPEED_OF_LIGHT_M_PER_NS / table['phase_refractive_index'] *
         np.mean(np.diff(table['t_bin_edges'])))
        #* table['angular_acceptance_fract']
        * (len(table['costheta_bin_edges']) - 1))
    summary['norm'] = norm

    dim_names = ('r', 'costheta', 't', 'costhetadir', 'deltaphidir')
    n_dims = len(table['table_shape'])
    assert n_dims == len(dim_names)

    # Apply norm to underflow and overflow so magnitudes can be compared
    # relative to plotted marginal distributions
    for flow, idx in product(('underflow', 'overflow'), iter(range(n_dims))):
        summary[flow][idx] = summary[flow][idx] * norm

    wstderr('Finding marginal distributions...\n')
    wstderr('    masking off zeros in table...')
    t0 = time()
    nonzero_table = np.ma.masked_equal(table['table'], 0)
    wstderr(' ({} ms)\n'.format(np.round((time() - t0) * 1e3, 3)))

    t0_marg = time()
    summary['dimensions'] = OrderedDict()
    for keep_axis, ax_name in zip(tuple(range(n_dims)), dim_names):
        remove_axes = list(range(n_dims))
        remove_axes.pop(keep_axis)
        remove_axes = tuple(remove_axes)
        axis = OrderedDict()

        wstderr('    mean across non-{} axes...'.format(ax_name))
        t0 = time()
        axis['mean'] = norm * np.asarray(
            np.mean(table['table'], axis=remove_axes))
        wstderr(' ({} s)\n'.format(np.round(time() - t0, 3)))

        wstderr('    median across non-{} axes...'.format(ax_name))
        t0 = time()
        axis['median'] = norm * np.asarray(
            np.ma.median(nonzero_table, axis=remove_axes))
        wstderr(' ({} s)\n'.format(np.round(time() - t0, 3)))

        wstderr('    max across non-{} axes...'.format(ax_name))
        t0 = time()
        axis['max'] = norm * np.asarray(
            np.max(table['table'], axis=remove_axes))
        wstderr(' ({} s)\n'.format(np.round(time() - t0, 3)))
        summary['dimensions'][ax_name] = axis
    wstderr('  Total time to find marginal distributions: {} s\n'.format(
        np.round(time() - t0_marg, 3)))

    if save_summary:
        ext = None
        base_fname = clsim_fname
        while ext not in ('', '.fits'):
            base_fname, ext = splitext(base_fname)
            ext = ext.lower()
        outfpath = join(outdir, base_fname + '_summary.json.bz2')
        to_json(summary, outfpath)
        print('saved summary to "{}"'.format(outfpath))

    wstderr('Time to summarize table: {} s\n'.format(
        np.round(time() - t_start, 3)))

    return table, summary
Ejemplo n.º 56
0
                        help="settings file to use for making templates.")
    hselect = parser.add_mutually_exclusive_group(required=False)
    hselect.add_argument('--normal', dest='normal', default=True,
                        action='store_true', help="select the normal hierarchy")
    hselect.add_argument('--inverted', dest='normal', default = False,
                        action='store_false', help="select the inverted hierarchy")
    parser.add_argument('-v','--verbose',action='count',default=None,
                        help='set verbosity level.')
    args = parser.parse_args()

    set_verbosity(args.verbose)

    profile.info("start initializing")

    #Load all the settings
    model_settings = from_json(args.settings)

    #Select a hierarchy
    logging.info('Selected %s hierarchy'%
            ('normal' if args.normal else 'inverted'))
    params =  select_hierarchy(model_settings['params'],normal_hierarchy=args.normal)

    #Intialize template maker
    template_maker = TemplateMaker(get_values(params),**model_settings['binning'])

    profile.info("stop initializing")

    #Now get the actual template
    profile.info("start template calculation")
    template_maker.get_template(get_values(params))
    profile.info("stop template calculation")