Example #1
0
def fix_oppo_flux(input_data):
    """Fix this `oppo` flux insanity
    someone added this in the nominal flux calculation that
    oppo flux is nue flux if flavour is nuebar, and vice versa
    here we revert that, incase these oppo keys are there

    """
    for key, val in input_data.items():
        if "neutrino_oppo_nue_flux" not in val:
            continue
        logging.warning(
            'renaming the outdated "oppo" flux keys in "%s", in the future do'
            " not use those anymore",
            key,
        )
        if "bar" in key:
            val["nominal_nue_flux"] = val.pop("neutrino_oppo_nue_flux")
            val["nominal_numu_flux"] = val.pop("neutrino_oppo_numu_flux")
            val["nominal_nuebar_flux"] = val.pop("neutrino_nue_flux")
            val["nominal_numubar_flux"] = val.pop("neutrino_numu_flux")
        else:
            val["nominal_nue_flux"] = val.pop("neutrino_nue_flux")
            val["nominal_numu_flux"] = val.pop("neutrino_numu_flux")
            val["nominal_nuebar_flux"] = val.pop("neutrino_oppo_nue_flux")
            val["nominal_numubar_flux"] = val.pop("neutrino_oppo_numu_flux")
    def select_params(self, selections, error_on_missing=True):
        successes = 0
        if selections is not None:
            for pipeline in self:
                try:
                    pipeline.select_params(selections, error_on_missing=True)
                except KeyError:
                    pass
                else:
                    successes += 1

            if error_on_missing and successes == 0:
                raise KeyError(
                    'None of the stages from any pipeline in this distribution'
                    ' maker has all of the selections %s available.'
                    %(selections,)
                )
        else:
            for pipeline in self:
                possible_selections = pipeline.param_selections
                if possible_selections:
                    logging.warning(
                        "Although you didn't make a parameter "
                        "selection, the following were available: %s."
                        " This may cause issues.", possible_selections
                    )
Example #3
0
 def load_discr_sys(self, sys_list):
     """Load the fit results from the file and make some check
     compatibility"""
     self.fit_results = from_file(self.params['fit_results_file'].value)
     if not set(self.input_names) == set(self.fit_results['map_names']):
         for name in self.input_names:
             if not name in self.fit_results['map_names']:
                 #check if there is somethingi uniquely compatible
                 compatible_names = [
                     mapname in name
                     for mapname in self.fit_results['map_names']
                 ]
                 if sum(compatible_names) == 1:
                     # compatible
                     compatible_name = self.fit_results['map_names'][
                         compatible_names.index(True)]
                     self.fit_results[name] = self.fit_results[
                         compatible_name]
                     logging.warning(
                         'Substituting hyperplane parameterization %s for %s'
                         % (compatible_name, name))
                 else:
                     logging.error('No compatible map for %s found!' % name)
     assert set(sys_list) == set(self.fit_results['sys_list'])
     self.sys_list = self.fit_results['sys_list']
Example #4
0
    def save(self, fpath, ver=None, **kwargs):
        """Save cross sections (and the energy specification) to a file at
        `fpath`."""
        if ver is None:
            if self._ver is None:
                raise ValueError(
                    'Either a ver must be specified in call to `save` or it '
                    'must have been set prior to the invocation of `save`.'
                )
            ver = self._ver
        else:
            assert ver == self._ver

        try:
            fpath = find_resource(fpath)
        except IOError:
            pass
        fpath = os.path.expandvars(os.path.expanduser(fpath))
        all_xs = {}
        # Get any existing data from file
        if os.path.exists(fpath):
            all_xs = from_file(fpath)
        # Validate existing data by instantiating objects from each
        for v, d in all_xs.items():
            CrossSections(ver=v, energy=d['energy'], xsec=d['xsec'])
        if ver in all_xs:
            logging.warning('Overwriting existing version "' + ver +
                            '" in file ' + fpath)
        all_xs[ver] = {'xsec':self, 'energy':self.energy}
        to_file(all_xs, fpath, **kwargs)
Example #5
0
    def link_containers(self, key, names):
        """Link containers together. When containers are linked, they are
        treated as a single (virtual) container for binned data

        Parameters
        ----------
        key : str
            name of linked object

        names : list
            name of containers to be linked under the given key

        """
        # intersection of names for linking and available names

        link_names = set(names) & set(self.names)
        if len(link_names) < len(names):
            logging.warning(
                "Skipping containers %s in linking, as those are not present" %
                (set(names) - set(self.names)))

        containers = [self.__getitem__(name) for name in link_names]
        logging.trace('Linking containers %s into %s' % (link_names, key))
        new_container = VirtualContainer(key, containers)
        self.linked_containers.append(new_container)
Example #6
0
 def plot_2d_array(self,
                   map_set,
                   n_rows=None,
                   n_cols=None,
                   fname=None,
                   **kwargs):
     """plot all maps or transforms in a single plot"""
     if fname is None:
         fname = map_set.name
     # if dimensionality is 3, then still define a spli_axis automatically
     new_maps = []
     split_axis = kwargs.pop('split_axis', None)
     for map in map_set:
         if len(map.binning) == 3:
             if split_axis is None:
                 # Find shortest dimension
                 l = map.binning.num_bins
                 idx = l.index(min(l))
                 split_axis_ = map.binning.names[idx]
                 logging.warning(
                     'Plotter automatically splitting map %s along %s axis',
                     map.name, split_axis_)
             new_maps.extend(map.split(split_axis_))
         elif len(map.binning) == 2:
             new_maps.append(map)
         else:
             raise Exception('Cannot plot %i dimensional map in 2d' %
                             len(map))
     map_set = MapSet(new_maps)
     self.plot_array(map_set,
                     'plot_2d_map',
                     n_rows=n_rows,
                     n_cols=n_cols,
                     **kwargs)
     self.dump(fname)
Example #7
0
def compare_numeric(test,
                    ref,
                    label=None,
                    ac_kw=deepcopy(AC_KW),
                    ignore_fails=False):
    """Compare scalars or numpy ndarrays.

    Parameters
    ----------
    test : scalar or numpy.ndarray
    ref : scalar or numpy.ndarray
    label : str or None, optional
    ac_kw : mapping, optional
        Keyword args to pass via **ac_kw to `numpy.isclose` / `numpy.allclose`
    ignore_fails : bool, optional

    Returns
    -------
    rslt : bool

    """
    pfx = f"{label} :: " if label else ""
    with np.printoptions(**PRINTOPTS):
        if np.isscalar(test):
            if np.isclose(test, ref, **ac_kw):
                return True

            msg = f"{pfx}test: {test} != ref: {ref}"
            if ignore_fails:
                logging.warning(msg)
            else:
                logging.error(msg)
            return False

        # Arrays
        if np.allclose(test, ref, **ac_kw):
            return True

        diff = test - ref
        msg = f"{pfx}test:" f"\n{(test)}\n!= ref:\n{(ref)}" f"\ndiff:\n{(diff)}"

        if not np.all(ref == 1):
            nzmask = ref != 0
            zmask = ref == 0
            fdiff = np.empty_like(ref)
            fdiff[nzmask] = diff[nzmask] / ref[nzmask]
            fdiff[zmask] = np.nan
            msg += f"\nfractdiff:\n{(fdiff)}"

        if ignore_fails:
            logging.warning(msg)
        else:
            logging.error(msg)

        return False
Example #8
0
    def load_noise_events(config, dataset):
        name = config.get('general', 'name')
        weight = config.get('noise', 'weight')
        weight_units = config.get('noise', 'weight_units')
        sys_list = split(config.get('noise', 'sys_list'))
        base_prefix = config.get('noise', 'baseprefix')
        keep_keys = split(config.get('noise', 'keep_keys'))
        aliases = config.items('noise%saliases' % SEP)
        if base_prefix == 'None':
            base_prefix = ''

        if dataset == 'nominal':
            paths = []
            for sys in sys_list:
                ev_sys = 'noise%s%s' % (SEP, sys)
                nominal = config.get(ev_sys, 'nominal')
                ev_sys_nom = ev_sys + SEP + nominal
                paths.append(config.get(ev_sys_nom, 'file_path'))
            if len(set(paths)) > 1:
                raise AssertionError(
                    'Choice of nominal file is ambigous. Nominal '
                    'choice of systematic parameters must coincide '
                    'with one and only one file. Options found are: '
                    '{0}'.format(paths))
            file_path = paths[0]
        else:
            file_path = config.get(dataset, 'file_path')
        logging.info('Extracting noise dataset "{0}" from sample '
                     '"{1}"'.format(dataset, name))

        noise = from_file(file_path)
        sample.strip_keys(keep_keys, noise)

        if weight == 'None' or weight == '1':
            noise['sample_weight'] = np.ones(noise['weights'].shape)
        elif weight == '0':
            noise['sample_weight'] = np.zeros(noise['weights'].shape)
        else:
            noise['sample_weight'] = noise[weight] * ureg(weight_units)
        noise['pisa_weight'] = deepcopy(noise['sample_weight'])

        for alias, expr in aliases:
            if alias in noise:
                logging.warning(
                    'Overwriting Data key {0} with aliased expression '
                    '{1}'.format(alias, expr))
            noise[alias] = eval(re.sub(r'\<(.*?)\>', r"noise['\1']", expr))

        noise_dict = {'noise': noise}
        return Data(noise_dict,
                    metadata={
                        'name': name,
                        'noise_sample': dataset
                    })
Example #9
0
    def find_best_fit(self, check_octant=True, pprint=True, skip=False):
        """ find best fit points (max likelihood) for the free parameters and
            return likelihood + found parameter values.
        """
        # Reset free parameters to nominal values
        logging.info('resetting params')
        self.template_maker.params.reset_free()
        if not check_octant:
            logging.warning('Skipping octant check in fit!')

        best_fit_vals, metric_val, all_metrics, dict_flags = self.run_minimizer(
            pprint=pprint, skip=skip)
        best_fit = {}
        best_fit[self.metric] = metric_val
        best_fit['warnflag'] = dict_flags['warnflag']
        best_fit['avg_tmp_time'] = dict_flags['avg_tmp_time']
        best_fit['n_minimizer_calls'] = dict_flags['n_minimizer_calls']
        best_fit['funcalls'] = dict_flags['funcalls']
        best_fit['all_metrics'] = all_metrics
        if not self.blind:
            for pname in self.template_maker.params.free.names:
                best_fit[pname] = self.template_maker.params[pname].value

        # decide wether fit for second octant is necessary
        if 'theta23' in self.template_maker.params.free.names and not skip:
            if check_octant:
                logging.info('checking other octant of theta23')
                self.template_maker.params.reset_free()
                # changing to other octant
                theta23 = self.template_maker.params['theta23']
                inflection_point = 45 * ureg.degree
                theta23.value = 2 * inflection_point.to(
                    theta23.value.units) - theta23.value
                self.template_maker.update_params(theta23)
                best_fit_vals, metric_val, all_metrics, dict_flags = self.run_minimizer(
                    pprint=pprint)

                # compare results a and b, take one with lower llh
                if metric_val < best_fit[self.metric]:
                    # accept these values
                    logging.info('Accepting other octant fit')
                    best_fit[self.metric] = metric_val
                    best_fit['warnflag'] = dict_flags['warnflag']
                    best_fit['all_metrics'] = all_metrics
                    if not self.blind:
                        for pname in self.template_maker.params.free.names:
                            best_fit[pname] = self.template_maker.params[
                                pname].value

                else:
                    logging.info('Accepting initial octant fit')

        return best_fit
Example #10
0
 def get_binned_data(self, key, out_binning=None):
     """Get data array from binned data:
     if the key is a binning dimensions, then unroll the binning
     otherwise return the corresponding flattened array
     """
     if out_binning is not None:
         # check if key is binning dimension
         if key in out_binning.names:
             return self.unroll_binning(key, out_binning)
     binning, data = self.binned_data[key]
     if out_binning is not None:
         if not binning == out_binning:
             logging.warning('Automatically re-beinning data %s'%key)
             sample = [SmartArray(self.unroll_binning(name, binning)) for name in binning.names]
             new_sample = [SmartArray(self.unroll_binning(name, out_binning)) for name in out_binning.names]
             return resample(data, sample, binning, new_sample, out_binning)
     return data
Example #11
0
def fix_oppo_flux(input_data):
    """Fix this `oppo` flux insanity
    someone added this in the nominal flux calculation that
    oppo flux is nue flux if flavour is nuebar, and vice versa
    here we revert that, incase these oppo keys are there
    """
    for key, val in input_data.items():
        if "neutrino_oppo_nue_flux" not in val:
            continue
        logging.warning(
            'renaming the outdated "oppo" flux keys in "%s", in the future do'
            " not use those anymore",
            key,
        )
        if "bar" in key:
            for new, old in OPPO_FLUX_LEGACY_FIX_MAPPING_NUBAR.items():
                val[new] = val.pop(old)
        else:
            for new, old in OPPO_FLUX_LEGACY_FIX_MAPPING_NU.items():
                val[new] = val.pop(old)
Example #12
0
    def get_outputs(self):
        """
        Get the outputs of the PISA stage
        Depending on `self.output_mode`, this may be a binned object, or the event container itself
        """

        if self.output_mode == 'binned' and len(self.output_apply_keys) == 1:
            self.outputs = self.data.get_mapset(self.output_apply_keys[0])
        elif len(self.output_apply_keys
                 ) == 2 and 'errors' in self.output_apply_keys:
            other_key = [
                key for key in self.output_apply_keys if not key == 'errors'
            ][0]
            self.outputs = self.data.get_mapset(other_key, error='errors')
        elif self.output_mode == "events":
            self.outputs = self.data
        else:
            self.outputs = None
            logging.warning('Cannot create CAKE style output mapset')

        return self.outputs
Example #13
0
    def load_from_nu_file(events_file, all_flavints, weight, weight_units,
                          keep_keys, aliases):
        flav_fidg = FlavIntDataGroup(flavint_groups=all_flavints)

        events = from_file(events_file)
        sample.strip_keys(keep_keys, events)

        nu_mask = events['ptype'] > 0
        nubar_mask = events['ptype'] < 0
        cc_mask = events['interaction'] == 1
        nc_mask = events['interaction'] == 2

        if weight == 'None' or weight == '1':
            events['sample_weight'] = \
                np.ones(events['ptype'].shape) * ureg.dimensionless
        elif weight == '0':
            events['sample_weight'] = \
                np.zeros(events['ptype'].shape) * ureg.dimensionless
        else:
            events['sample_weight'] = events[weight] * \
                ureg(weight_units)
        events['pisa_weight'] = deepcopy(events['sample_weight'])

        for alias, expr in aliases:
            if alias in events:
                logging.warning(
                    'Overwriting Data key {0} with aliased expression '
                    '{1}'.format(alias, expr))
            events[alias] = eval(re.sub(r'\<(.*?)\>', r"events['\1']", expr))

        for flavint in all_flavints:
            i_mask = cc_mask if flavint.cc else nc_mask
            t_mask = nu_mask if flavint.particle else nubar_mask

            flav_fidg[flavint] = {
                var: events[var][i_mask & t_mask]
                for var in events.iterkeys()
            }
        return flav_fidg
Example #14
0
    def __init__(self, pipelines, label=None, set_livetime_from_data=True):

        self.label = label
        self._source_code_hash = None
        self.metadata = OrderedDict()

        self._pipelines = []
        if isinstance(pipelines,
                      (str, PISAConfigParser, OrderedDict, Pipeline)):
            pipelines = [pipelines]

        for pipeline in pipelines:
            if not isinstance(pipeline, Pipeline):
                pipeline = Pipeline(pipeline)
            self._pipelines.append(pipeline)

        data_run_livetime = None
        if set_livetime_from_data:
            #
            # Get livetime metadata if defined in any stage in any pipeline
            #
            for pipeline_idx, pipeline in enumerate(self):
                for stage_idx, stage in enumerate(pipeline):
                    if not (hasattr(stage, "metadata")
                            and isinstance(stage.metadata, Mapping)
                            and "livetime" in stage.metadata):
                        continue

                    if data_run_livetime is None:
                        data_run_livetime = stage.metadata["livetime"]

                    if stage.metadata["livetime"] != data_run_livetime:
                        raise ValueError(
                            "Pipeline index {}, stage index {} has data"
                            " livetime = {}, in disagreement with"
                            " previously-found livetime = {}".format(
                                pipeline_idx,
                                stage_idx,
                                stage.metadata["livetime"],
                                data_run_livetime,
                            ))

            # Save the last livetime found inside the pipeline's metadata
            # TODO: implement metadata in the pipeline class instead
            self.metadata['livetime'] = data_run_livetime
            #
            # Set param `params.livetime` for any pipelines that have it
            #
            if data_run_livetime is not None:

                data_run_livetime *= ureg.sec

                for pipeline_idx, pipeline in enumerate(self):

                    if "livetime" not in pipeline.params.names:
                        continue

                    pipeline.params.livetime.is_fixed = True

                    if pipeline.params.livetime != data_run_livetime:

                        logging.warning(
                            "Pipeline index %d has params.livetime = %s, in"
                            " disagreement with data livetime = %s defined by"
                            " data. The pipeline's livetime param will be"
                            " reset to the latter value and set to be fixed"
                            " (if it is not alredy).",
                            pipeline_idx,
                            pipeline.params.livetime.value,
                            data_run_livetime,
                        )
                        pipeline.params.livetime = data_run_livetime

        #for pipeline in self:
        #    pipeline.select_params(self.param_selections,
        #                           error_on_missing=False)

        # Make sure that all the pipelines have the same detector name (or None)
        self._detector_name = 'no_name'
        for p in self._pipelines:
            name = p._detector_name
            if name != self._detector_name and self._detector_name != 'no_name':
                raise NameError(
                    'Different detector names in distribution_maker pipelines')

            self._detector_name = name
Example #15
0
def main(return_outputs=False):
    """Run unit tests if `pipeline.py` is called as a script."""
    from pisa.utils.plotter import Plotter

    args = parse_args()
    set_verbosity(args.v)

    # Even if user specifies an integer on command line, it comes in as a
    # string. Try to convert to int (e.g. if `'1'` is passed to indicate the
    # second stage), and -- if successful -- use this as `args.only_stage`.
    # Otherwise, the string value passed will be used (e.g. `'osc'` could be
    # passed).
    try:
        only_stage_int = int(args.only_stage)
    except (ValueError, TypeError):
        pass
    else:
        args.only_stage = only_stage_int

    if args.outdir:
        mkdir(args.outdir)
    else:
        if args.pdf or args.png:
            raise ValueError("No --outdir provided, so cannot save images.")

    # Most basic parsing of the pipeline config (parsing only to this level
    # allows for simple strings to be specified as args for updating)
    bcp = PISAConfigParser()
    bcp.read(args.pipeline)

    # Update the config with any args specified on command line
    if args.arg is not None:
        for arg_list in args.arg:
            if len(arg_list) < 2:
                raise ValueError(
                    'Args must be formatted as: "section arg=val". Got "%s"'
                    " instead." % " ".join(arg_list))
            section = arg_list[0]
            remainder = " ".join(arg_list[1:])
            eq_split = remainder.split("=")
            newarg = eq_split[0].strip()
            value = ("=".join(eq_split[1:])).strip()
            logging.debug('Setting config section "%s" arg "%s" = "%s"',
                          section, newarg, value)
            try:
                bcp.set(section, newarg, value)
            except NoSectionError:
                logging.error(
                    'Invalid section "%s" specified. Must be one of %s',
                    section,
                    bcp.sections(),
                )
                raise

    # Instantiate the pipeline
    pipeline = Pipeline(bcp)  # pylint: disable=redefined-outer-name

    if args.select is not None:
        pipeline.select_params(args.select, error_on_missing=True)

    if args.only_stage is None:
        stop_idx = args.stop_after_stage
        try:
            stop_idx = int(stop_idx)
        except (TypeError, ValueError):
            pass
        if isinstance(stop_idx, str):
            stop_idx = pipeline.index(stop_idx)
        outputs = pipeline.get_outputs(idx=stop_idx)  # pylint: disable=redefined-outer-name
        if stop_idx is not None:
            stop_idx += 1
        indices = slice(0, stop_idx)
    else:
        assert args.stop_after_stage is None
        idx = pipeline.index(args.only_stage)
        stage = pipeline[idx]
        indices = slice(idx, idx + 1)

        # Create dummy inputs if necessary
        inputs = None
        if hasattr(stage, "input_binning"):
            logging.warning(
                "Stage requires input, so building dummy"
                " inputs of random numbers, with random state set to the input"
                " index according to alphabetical ordering of input names and"
                " filled in alphabetical ordering of dimension names.")
            input_maps = []
            tmp = deepcopy(stage.input_binning)
            alphabetical_binning = tmp.reorder_dimensions(sorted(tmp.names))
            for input_num, input_name in enumerate(sorted(stage.input_names)):
                # Create a new map with all 3's; name according to the input
                hist = np.full(shape=alphabetical_binning.shape,
                               fill_value=3.0)
                input_map = Map(name=input_name,
                                binning=alphabetical_binning,
                                hist=hist)

                # Apply Poisson fluctuations to randomize the values in the map
                input_map.fluctuate(method="poisson", random_state=input_num)

                # Reorder dimensions according to user's original binning spec
                input_map.reorder_dimensions(stage.input_binning)
                input_maps.append(input_map)
            inputs = MapSet(maps=input_maps, name="ones", hash=1)

        outputs = stage.run(inputs=inputs)

    for stage in pipeline[indices]:
        if not args.outdir:
            break
        stg_svc = stage.stage_name + "__" + stage.service_name
        fbase = os.path.join(args.outdir, stg_svc)
        if args.intermediate or stage == pipeline[indices][-1]:
            stage.outputs.to_json(fbase + "__output.json.bz2")

        # also only plot if args intermediate or last stage
        if args.intermediate or stage == pipeline[indices][-1]:
            formats = OrderedDict(png=args.png, pdf=args.pdf)
            if isinstance(stage.outputs, Data):
                # TODO(shivesh): plots made here will use the most recent
                # "pisa_weight" column and so all stages will have identical plots
                # (one workaround is to turn on "memcache_deepcopy")
                # TODO(shivesh): intermediate stages have no output binning
                if stage.output_binning is None:
                    logging.debug("Skipping plot of intermediate stage %s",
                                  stage)
                    continue
                outputs = stage.outputs.histogram_set(
                    binning=stage.output_binning,
                    nu_weights_col="pisa_weight",
                    mu_weights_col="pisa_weight",
                    noise_weights_col="pisa_weight",
                    mapset_name=stg_svc,
                    errors=True,
                )

            try:
                for fmt, enabled in formats.items():
                    if not enabled:
                        continue
                    my_plotter = Plotter(
                        stamp="Event rate",
                        outdir=args.outdir,
                        fmt=fmt,
                        log=False,
                        annotate=args.annotate,
                    )
                    my_plotter.ratio = True
                    my_plotter.plot_2d_array(outputs,
                                             fname=stg_svc + "__output",
                                             cmap="RdBu")
            except ValueError as exc:
                logging.error(
                    "Failed to save plot to format %s. See exception"
                    " message below",
                    fmt,
                )
                traceback.format_exc()
                logging.exception(exc)
                logging.warning("I can't go on, I'll go on.")

    if return_outputs:
        return pipeline, outputs
Example #16
0
    def _init_stages(self):
        """Stage factory: Instantiate stages specified by self.config.

        Conventions required for this to work:
            * Stage and service names must be lower-case
            * Service implementations must be found at Python path
              `pisa.stages.<stage_name>.<service_name>`
            * `service` cannot be an instantiation argument for a service

        """
        stages = []
        for stage_num, item in enumerate(self.config.items()):
            try:
                name, settings = item

                if isinstance(name, str):
                    if name == 'pipeline':
                        continue

                stage_name, service_name = name

                # old cfgs compatibility
                if service_name.startswith('pi_'):
                    logging.warning(
                        f"Old stage name `{service_name}` is automatically renamed to `{service_name.replace('pi_', '')}`. "
                        + "Please change your config in the future!")
                service_name = service_name.replace('pi_', '')

                logging.debug("instantiating stage %s / service %s",
                              stage_name, service_name)

                # Import service's module
                logging.trace(
                    f"Importing service module: {stage_name}.{service_name}")
                try:
                    module_path = f"pisa.stages.{stage_name}.{service_name}"
                    module = import_module(module_path)
                except:
                    logging.debug(
                        f"Module {stage_name}.{service_name} not found in PISA, trying "
                        "to import from external definition.")
                    module_path = f"{stage_name}.{service_name}"
                    module = import_module(module_path)

                # Get service class from module
                service_cls = getattr(module, service_name)

                # Instantiate service
                logging.trace(
                    "initializing stage.service %s.%s with settings %s" %
                    (stage_name, service_name, settings))
                try:
                    service = service_cls(**settings, profile=self._profile)
                except Exception:
                    logging.error(
                        "Failed to instantiate stage.service %s.%s with settings %s",
                        stage_name,
                        service_name,
                        settings.keys(),
                    )
                    raise

                if not isinstance(service, Stage):
                    raise TypeError(
                        'Trying to create service "%s" for stage #%d (%s),'
                        " but object %s instantiated from class %s is not a"
                        " PISA Stage type but instead is of type %s." % (
                            service_name,
                            stage_num,
                            stage_name,
                            service,
                            service_cls,
                            type(service),
                        ))

                stages.append(service)

            except:
                logging.error(
                    "Failed to initialize stage #%d (stage=%s, service=%s).",
                    stage_num,
                    stage_name,
                    service_name,
                )
                raise

        # set parameters with an identical name to the same object
        # otherwise we get inconsistent behaviour when setting repeated params
        # See Isues #566 and #648
        all_parans = self.params
        self.update_params(all_parans, existing_must_match=True, extend=False)

        param_selections = set()
        for service in stages:
            param_selections.update(service.param_selections)
        param_selections = sorted(param_selections)

        for stage in stages:
            stage.select_params(param_selections, error_on_missing=False)

        self._stages = stages

        self.setup()
Example #17
0
def test_example_pipelines(ignore_gpu=False,
                           ignore_root=False,
                           ignore_missing_data=False):
    """Run example pipelines.

    Parameters
    ----------
    ignore_gpu : bool
        Do not count errors initializing a GPU as failures

    ignore_root : bool
        Do not count errors importing ROOT as failures

    ignore_missing_data : bool
        Do not count errors due to missing data files as failures

    """
    # Set up the lists of strings needed to search the error messages for
    # things to ignore e.g. cuda stuff and ROOT stuff
    root_err_strings = ['ROOT', 'Roo', 'root', 'roo']
    cuda_err_strings = ['cuda']
    missing_data_string = ('Could not find resource "(.*)" in'
                           ' filesystem OR in PISA package.')

    example_directory = find_resource('settings/pipeline')
    settings_files = glob.glob(example_directory + '/*example*.cfg')

    num_configs = len(settings_files)
    failure_count = 0
    skip_count = 0

    for settings_file in settings_files:
        allow_error = False
        msg = ''
        try:
            logging.info('Instantiating pipeline from file "%s" ...',
                         settings_file)
            pipeline = Pipeline(settings_file)
            logging.info('    retrieving outputs...')
            _ = pipeline.get_outputs()

        except ImportError as err:
            exc = sys.exc_info()
            if any(errstr in err.message for errstr in root_err_strings) and \
              ignore_root:
                skip_count += 1
                allow_error = True
                msg = ('    Skipping pipeline, %s, as it has ROOT dependencies'
                       ' (ROOT cannot be imported)' % settings_file)
            elif any(errstr in err.message for errstr in cuda_err_strings) and \
              ignore_gpu:
                skip_count += 1
                allow_error = True
                msg = ('    Skipping pipeline, %s, as it has cuda dependencies'
                       ' (pycuda cannot be imported)' % settings_file)
            else:
                failure_count += 1

        except IOError as err:
            exc = sys.exc_info()
            match = re.match(missing_data_string, err.message, re.M | re.I)
            if match is not None and ignore_missing_data:
                skip_count += 1
                allow_error = True
                msg = ('    Skipping pipeline, %s, as it has data that cannot'
                       ' be found in the local PISA environment' %
                       settings_file)
            else:
                failure_count += 1

        except:  # pylint: disable=bare-except
            exc = sys.exc_info()
            failure_count += 1

        else:
            exc = None

        finally:
            if exc is not None:
                if allow_error:
                    logging.warning(msg)
                else:
                    logging.error(
                        '    FAILURE! %s failed to run. Please review the'
                        ' error message below and fix the problem. Continuing'
                        ' with any other configs now...', settings_file)
                    for line in format_exception(*exc):
                        for sub_line in line.splitlines():
                            logging.error(' ' * 4 + sub_line)
            else:
                logging.info('    Seems fine!')

    if skip_count > 0:
        logging.warning('%d of %d example pipeline config files were skipped',
                        skip_count, num_configs)

    if failure_count > 0:
        msg = ('<< FAIL : test_example_pipelines : (%d of %d EXAMPLE PIPELINE'
               ' CONFIG FILES FAILED) >>' % (failure_count, num_configs))
        logging.error(msg)
        raise Exception(msg)

    logging.info('<< PASS : test_example_pipelines >>')
Example #18
0
def parse_pipeline_config(config):
    """Parse pipeline config.

    Parameters
    ----------
    config : string or ConfigParser

    Returns
    -------
    stage_dicts : OrderedDict
        Keys are (stage_name, service_name) tuples and values are OrderedDicts
        with keys the argnames and values the arguments' values. Some known arg
        values are parsed out fully into Python objects, while the rest remain
        as strings that must be used or parsed elsewhere.

    """
    # Note: imports placed here to avoid circular imports
    from pisa.core.binning import MultiDimBinning, OneDimBinning
    from pisa.core.param import ParamSelector

    if isinstance(config, basestring):
        config = from_file(config)
    elif isinstance(config, PISAConfigParser):
        pass
    else:
        raise TypeError(
            '`config` must either be a string or PISAConfigParser. Got %s '
            'instead.' % type(config))

    if not config.has_section('binning'):
        raise NoSectionError(
            "Could not find 'binning'. Only found sections: %s" %
            config.sections())

    # Create binning objects
    binning_dict = {}
    for name, value in config['binning'].items():
        if name.endswith('.order'):
            order = split(config.get('binning', name))
            binning, _ = split(name, sep='.')
            bins = []
            for bin_name in order:
                try:
                    def_raw = config.get('binning', binning + '.' + bin_name)
                except:
                    dims_defined = [
                        split(dim, sep='.')[1]
                        for dim in config['binning'].keys()
                        if dim.startswith(binning +
                                          '.') and not dim.endswith('.order')
                    ]
                    logging.error(
                        "Failed to find definition of '%s' dimension of '%s'"
                        " binning entry. Only found definition(s) of: %s",
                        bin_name, binning, dims_defined)
                    del dims_defined
                    raise
                try:
                    kwargs = eval(def_raw)  # pylint: disable=eval-used
                except:
                    logging.error(
                        "Failed to evaluate definition of '%s' dimension of"
                        " '%s' binning entry:\n'%s'", bin_name, binning,
                        def_raw)
                    raise
                try:
                    bins.append(OneDimBinning(bin_name, **kwargs))
                except:
                    logging.error(
                        "Failed to instantiate new `OneDimBinning` from '%s'"
                        " dimension of '%s' binning entry with definition:\n"
                        "'%s'\n", bin_name, binning, kwargs)
                    raise
            binning_dict[binning] = MultiDimBinning(bins)

    # Pipeline section
    section = 'pipeline'

    # Get and parse the order of the stages (and which services implement them)
    order = [split(x, STAGE_SEP) for x in split(config.get(section, 'order'))]

    param_selections = []
    if config.has_option(section, 'param_selections'):
        param_selections = split(config.get(section, 'param_selections'))

    detector_name = None
    if config.has_option(section, 'detector_name'):
        detector_name = config.get(section, 'detector_name')

    # Parse [stage.<stage_name>] sections and store to stage_dicts
    stage_dicts = OrderedDict()
    for stage, service in order:
        old_section_header = 'stage%s%s' % (STAGE_SEP, stage)
        new_section_header = '%s%s%s' % (stage, STAGE_SEP, service)
        if config.has_section(old_section_header):
            logging.warning(
                '"%s" is an old-style section header, in the future use "%s"' %
                (old_section_header, new_section_header))
            section = old_section_header
        elif config.has_section(new_section_header):
            section = new_section_header
        else:
            raise IOError(
                'missing section in cfg for stage "%s" service "%s"' %
                (stage, service))

        # Instantiate dict to store args to pass to this stage
        service_kwargs = OrderedDict()

        param_selector = ParamSelector(selections=param_selections)
        service_kwargs['params'] = param_selector

        n_params = 0
        for fullname in config.options(section):
            try:
                value = config.get(section, fullname)
            except:
                logging.error(
                    'Unable to obtain value of option "%s" in section "%s".' %
                    (fullname, section))
                raise
            # See if this matches a param specification
            param_match = PARAM_RE.match(fullname)
            if param_match is not None:
                n_params += 1

                param_match_dict = param_match.groupdict()
                param_subfields = param_match_dict['subfields'].split('.')

                # Figure out what the dotted fields represent...
                infodict = interpret_param_subfields(subfields=param_subfields)

                # If field is an attr, skip since these are located manually
                if infodict['attr'] is not None:
                    continue

                # Check if this param already exists in a previous stage; if
                # so, make sure there are no specs for this param, but just a
                # link to previous the param object that is already
                # instantiated.
                for kw in stage_dicts.values():
                    # Stage did not get a `params` argument from config
                    if not kw.has_key('params'):
                        continue

                    # Retrieve the param from the ParamSelector
                    try:
                        param = kw['params'].get(name=infodict['pname'],
                                                 selector=infodict['selector'])
                    except KeyError:
                        continue

                    # Make sure there are no other specs (in this section) for
                    # the param defined defined in previous section
                    for a in PARAM_ATTRS:
                        if config.has_option(section, '%s.%s' % (fullname, a)):
                            raise ValueError("Parameter spec. '%s' of '%s' "
                                             "found in section '%s', but "
                                             "parameter exists in previous "
                                             "stage!" % (a, fullname, section))

                    break

                # Param *not* found in a previous stage (i.e., no explicit
                # `break` encountered in `for` loop above); therefore must
                # instantiate it.
                else:
                    param = parse_param(config=config,
                                        section=section,
                                        selector=infodict['selector'],
                                        fullname=fullname,
                                        pname=infodict['pname'],
                                        value=value)

                param_selector.update(param, selector=infodict['selector'])

            # If it's not a param spec but contains 'binning', assume it's a
            # binning spec for CAKE stages
            elif 'binning' in fullname:
                service_kwargs[fullname] = binning_dict[value]

            # it's gonna be a PI stage
            elif '_specs' in fullname:
                value = parse_string_literal(value)
                # is it None?
                if value is None:
                    service_kwargs[fullname] = value
                # is it evts?
                elif value in ['evnts', 'events']:
                    service_kwargs[fullname] = 'events'
                # so it gotta be a binning
                else:
                    service_kwargs[fullname] = binning_dict[value]

            # it's a list on in/output names list
            elif fullname.endswith('_names'):
                value = split(value)
                service_kwargs[fullname] = value
            # Otherwise it's some other stage instantiation argument; identify
            # this by its full name and try to interpret and instantiate a
            # Python object using the string
            else:
                try:
                    value = parse_quantity(value)
                    value = value.nominal_value * value.units
                except ValueError:
                    value = parse_string_literal(value)
                service_kwargs[fullname] = value

        # If no params actually specified in config, remove 'params' from the
        # service's keyword args
        if n_params == 0:
            service_kwargs.pop('params')

        # Store the service's kwargs to the stage_dicts
        stage_dicts[(stage, service)] = service_kwargs

    stage_dicts['detector_name'] = detector_name
    return stage_dicts
Example #19
0
def main():
    args = parse_args()
    init_args_d = vars(args)

    # NOTE: Removing extraneous args that won't get passed to instantiate the
    # HypoTesting object via dictionary's `pop()` method.

    set_verbosity(init_args_d.pop('v'))

    detector = init_args_d.pop('detector')
    selection = init_args_d.pop('selection')
    atype = init_args_d.pop('atype')
    return_total = not init_args_d.pop('return_bits')

    # Normalize and convert `*_pipeline` filenames; store to `*_maker`
    # (which is argument naming convention that HypoTesting init accepts).
    for maker in ['h0', 'h1']:
        filenames = init_args_d.pop(maker + '_pipeline')
        if filenames is not None:
            filenames = sorted(
                [normcheckpath(fname) for fname in filenames]
            )
        init_args_d[maker + '_maker'] = filenames

        ps_name = maker + '_param_selections'
        ps_str = init_args_d[ps_name]
        if ps_str is None:
            ps_list = None
        else:
            ps_list = [x.strip().lower() for x in ps_str.split(',')]
        init_args_d[ps_name] = ps_list

    # Add dummies to the argument we don't care about for making these plots
    init_args_d['minimizer_settings'] = {}
    init_args_d['data_is_data'] = None
    init_args_d['fluctuate_data'] = None
    init_args_d['fluctuate_fid'] = None
    init_args_d['metric'] = 'chi2'

    if init_args_d['h1_maker'] is None:
        init_args_d['h1_maker'] = init_args_d['h0_maker']

    init_args_d['h0_maker'] = DistributionMaker(init_args_d['h0_maker'])
    init_args_d['h1_maker'] = DistributionMaker(init_args_d['h1_maker'])
    init_args_d['h1_maker'].select_params(init_args_d['h1_param_selections'])

    # Instantiate the analysis object
    hypo_testing = HypoTesting(**init_args_d)

    h0_maker = hypo_testing.h0_maker
    h0_maker.select_params(init_args_d['h0_param_selections'])
    for h0_pipeline in h0_maker.pipelines:
        # Need a special case where PID is a separate stage
        if 'pid' in h0_pipeline.stage_names:
            if return_total:
                raise ValueError(
                    "PID is a separate stage but you have requested"
                    " return_total in the arguments to this script."
                )
            return_h0_sum = False
        else:
            return_h0_sum = return_total
    h0_maps = h0_maker.get_outputs(return_sum=return_h0_sum)

    # Assume just a singular pipeline used here.
    # Not sure how else to deal with PID as a separate stage.
    if not return_h0_sum:
        h0_maps = h0_maps[0]

    h1_maker = hypo_testing.h1_maker
    h1_maker.select_params(init_args_d['h1_param_selections'])
    for h1_pipeline in h1_maker.pipelines:
        # Need a special case where PID is a separate stage
        if 'pid' in h1_pipeline.stage_names:
            if return_total:
                raise ValueError(
                    "PID is a separate stage but you have requested"
                    " return_total in the arguments to this script."
                )
            return_h1_sum = False
        else:
            return_h1_sum = return_total
    h1_maps = h1_maker.get_outputs(return_sum=return_h1_sum)

    # Assume just a singular pipeline used here.
    # Not sure how else to deal with PID as a separate stage.
    if not return_h1_sum:
        h1_maps = h1_maps[0]

    if not sorted(h0_maps.names) == sorted(h1_maps.names):
        raise ValueError(
            "The output names of your h0 and h1 pipelines "
            "do not agree - %s and %s."%(
                sorted(h0_maps.names), sorted(h1_maps.names)
            )
        )

    det_sel = []
    if detector.strip() != '':
        det_sel.append(detector.strip())
    if selection.strip() != '':
        det_sel.append(selection.strip())
    det_sel_label = ' '.join(det_sel)

    det_sel_plot_label = det_sel_label
    if det_sel_plot_label != '':
        det_sel_plot_label += ', '

    det_sel_file_label = det_sel_label
    if det_sel_file_label != '':
        det_sel_file_label += '_'
    det_sel_file_label = det_sel_file_label.replace(' ', '_')

    # Need a special case where PID is a separate stage
    if fnmatch(''.join(h0_maps.names), '*_tr*ck*'):

        h0_trck_map = h0_maps.combine_wildcard('*_tr*ck')
        h1_trck_map = h1_maps.combine_wildcard('*_tr*ck')
        h0_cscd_map = h0_maps.combine_wildcard('*_c*sc*d*')
        h1_cscd_map = h1_maps.combine_wildcard('*_c*sc*d*')

        plot_asymmetry(
            h0_map=h0_trck_map,
            h1_map=h1_trck_map,
            h0_name='%s' % args.h0_name,
            h1_name='%s' % args.h1_name,
            fulltitle='%sevents identified as track' % det_sel_plot_label,
            savename='%strck' % det_sel_file_label,
            outdir=args.logdir,
            atype=atype
        )

        plot_asymmetry(
            h0_map=h0_cscd_map,
            h1_map=h1_cscd_map,
            h0_name='%s' % args.h0_name,
            h1_name='%s' % args.h1_name,
            fulltitle=('%sevents identified as cascade'
                       % det_sel_plot_label),
            savename='%scscd' % det_sel_file_label,
            outdir=args.logdir,
            atype=atype
        )

    # Otherwise, PID is assumed to be a binning dimension
    elif 'pid' in h0_maps[h0_maps.names[0]].binning.names:

        for map_name in h0_maps.names:
            h0_map = h0_maps[map_name]
            h0_map.set_errors(error_hist=None)

            h1_map = h1_maps[map_name]
            h1_map.set_errors(error_hist=None)

            pid_names = h0_map.binning['pid'].bin_names
            if pid_names != h1_map.binning['pid'].bin_names:
                raise ValueError(
                    "h0 and h1 maps must have same PID bin names"
                    " in order to make the asymmetry plots"
                )
            if pid_names is None:
                logging.warning(
                    "There are no names given for the PID bins, thus "
                    "they will just be numbered in both the the plot "
                    "save names and titles."
                )
                pid_names = [
                    x for x in range(0, h0_map.binning['pid'].num_bins)
                ]

            for pid_name in pid_names:
                    
                h0_to_plot = h0_map.split(
                    dim='pid',
                    bin=pid_name
                )
                
                h1_to_plot = h1_map.split(
                    dim='pid',
                    bin=pid_name
                )

                if isinstance(pid_name, int):
                    pid_name = 'PID Bin %i' % (pid_name)

                plot_asymmetry(
                    h0_map=h0_to_plot,
                    h1_map=h1_to_plot,
                    h0_name='%s' % args.h0_name,
                    h1_name='%s' % args.h1_name,
                    fulltitle=('%sevents identified as %s'
                               % (det_sel_plot_label, pid_name)),
                    savename=('%s_%s%s' % (map_name,
                                           det_sel_file_label,
                                           pid_name)),
                    outdir=args.logdir,
                    atype=atype
                )

    else:

        for map_name in h0_maps.names:
            
            h0_map = h0_maps[map_name]
            h0_map.set_errors(error_hist=None)
            
            h1_map = h1_maps[map_name]
            h1_map.set_errors(error_hist=None)

            plot_asymmetry(
                h0_map=h0_map,
                h1_map=h1_map,
                h0_name='%s' % args.h0_name,
                h1_name='%s' % args.h1_name,
                fulltitle=('%sevents'%(det_sel_plot_label)),
                savename=('%s_%s' % (map_name, det_sel_file_label)),
                outdir=args.logdir,
                atype=atype
            )
Example #20
0
def test_kde_histogramdd():
    """Unit tests for kde_histogramdd"""
    from argparse import ArgumentParser
    from shutil import rmtree
    from tempfile import mkdtemp
    from pisa import ureg
    from pisa.core.map import Map, MapSet
    from pisa.utils.log import logging, set_verbosity
    from pisa.utils.plotter import Plotter

    parser = ArgumentParser()
    parser.add_argument("-v",
                        action="count",
                        default=None,
                        help="set verbosity level")
    args = parser.parse_args()
    set_verbosity(args.v)

    temp_dir = mkdtemp()

    try:
        my_plotter = Plotter(
            stamp="",
            outdir=temp_dir,
            fmt="pdf",
            log=False,
            annotate=False,
            symmetric=False,
            ratio=True,
        )

        b1 = OneDimBinning(name="coszen",
                           num_bins=20,
                           is_lin=True,
                           domain=[-1, 1],
                           tex=r"\cos(\theta)")
        b2 = OneDimBinning(name="energy",
                           num_bins=10,
                           is_log=True,
                           domain=[1, 80] * ureg.GeV,
                           tex=r"E")
        b3 = OneDimBinning(name="pid",
                           num_bins=2,
                           bin_edges=[0, 1, 2],
                           tex=r"pid")
        binning = b1 * b2 * b3

        # now let's generate some toy data

        N = 100000
        cz = np.random.normal(1, 1.2, N)
        # cut away coszen outside -1, 1
        cz = cz[(cz >= -1) & (cz <= 1)]
        e = np.random.normal(30, 20, len(cz))
        pid = np.random.uniform(0, 2, len(cz))
        data = np.array([cz, e, pid]).T

        # make numpy histogram for validation
        bins = [unp.nominal_values(b.bin_edges) for b in binning]
        raw_hist, _ = np.histogramdd(data, bins=bins)

        # get KDE'ed histo
        hist = kde_histogramdd(
            data,
            binning,
            bw_method="silverman",
            coszen_name="coszen",
            oversample=10,
            use_cuda=True,
            stack_pid=True,
        )

        # put into mapsets and plot
        m1 = Map(name="KDE", hist=hist, binning=binning)
        m2 = Map(name="raw", hist=raw_hist, binning=binning)
        with np.errstate(divide="ignore", invalid="ignore"):
            m3 = m2 / m1
        m3.name = "hist/KDE"
        m3.tex = m3.name
        m4 = m1 - m2
        m4.name = "KDE - hist"
        m4.tex = m4.name
        ms = MapSet([m1, m2, m3, m4])
        my_plotter.plot_2d_array(ms, fname="test_kde", cmap="summer")
    except:
        rmtree(temp_dir)
        raise
    else:
        logging.warning("Inspect and manually clean up output(s) saved to %s" %
                        temp_dir)
Example #21
0
File: hdf.py Project: thehrh/pisa-1
    def store_recursively(fhandle, node, path=None, attrs=None,
                          node_hashes=None):
        """Function for iteratively doing the work"""
        path = [] if path is None else path
        full_path = '/' + '/'.join(path)
        node_hashes = OrderedDict() if node_hashes is None else node_hashes

        if attrs is None:
            sorted_attr_keys = []
        else:
            if isinstance(attrs, OrderedDict):
                sorted_attr_keys = attrs.keys()
            else:
                sorted_attr_keys = sorted(attrs.keys())

        if isinstance(node, Mapping):
            logging.trace('  creating Group "%s"', full_path)
            try:
                dset = fhandle.create_group(full_path)
                for key in sorted_attr_keys:
                    dset.attrs[key] = attrs[key]
            except ValueError:
                pass

            for key in sorted(node.keys()):
                if isinstance(key, str):
                    key_str = key
                else:
                    key_str = str(key)
                    logging.warning(
                        'Making string from key "%s", %s for use as'
                        ' name in HDF5 file', key_str, type(key)
                    )
                val = node[key]
                new_path = path + [key_str]
                store_recursively(fhandle=fhandle, node=val, path=new_path,
                                  node_hashes=node_hashes)
        else:
            # Check for existing node
            node_hash = hash_obj(node)
            if node_hash in node_hashes:
                logging.trace('  creating hardlink for Dataset: "%s" -> "%s"',
                              full_path, node_hashes[node_hash])
                # Hardlink the matching existing dataset
                fhandle[full_path] = fhandle[node_hashes[node_hash]]
                return

            # For now, convert None to np.nan since h5py appears to not handle
            # None
            if node is None:
                node = np.nan
                logging.warning(
                    '  encountered `None` at node "%s"; converting to'
                    ' np.nan', full_path
                )

            # "Scalar datasets don't support chunk/filter options". Shuffling
            # is a good idea otherwise since subsequent compression will
            # generally benefit; shuffling requires chunking. Compression is
            # not done here since it is slow, but can be done by
            # post-processing the generated file(s).
            if np.isscalar(node):
                shuffle = False
                chunks = None
            else:
                shuffle = True
                chunks = True
                # Store the node_hash for linking to later if this is more than
                # a scalar datatype. Assumed that "None" has
                node_hashes[node_hash] = full_path

            # -- Handle special types -- #

            # See h5py docs at
            #
            #   https://docs.h5py.org/en/stable/strings.html#how-to-store-text-strings
            #
            # where using `bytes` objects (i.e., in numpy, np.string_) is
            # deemed the most compatible way to encode objects, but apparently
            # we don't have pytables compatibility right now.
            #
            # For boolean support, see
            #
            #   https://docs.h5py.org/en/stable/faq.html#faq

            # TODO: make written hdf5 files compatible with pytables
            # see docs at https://www.pytables.org/usersguide/datatypes.html

            if isinstance(node, string_types):
                node = np.string_(node)
            elif isinstance(node, bool):  # includes np.bool
                node = np.bool_(node)  # same as np.bool8
            elif isinstance(node, np.ndarray):
                if issubclass(node.dtype.type, string_types):
                    node = node.astype(np.string_)
                elif node.dtype.type in (bool, np.bool):
                    node = node.astype(np.bool_)

            logging.trace('  creating dataset at path "%s", hash %s',
                          full_path, node_hash)
            try:
                dset = fhandle.create_dataset(
                    name=full_path, data=node, chunks=chunks, compression=None,
                    shuffle=shuffle, fletcher32=False
                )
            except TypeError:
                try:
                    shuffle = False
                    chunks = None
                    dset = fhandle.create_dataset(
                        name=full_path, data=node, chunks=chunks,
                        compression=None, shuffle=shuffle, fletcher32=False
                    )
                except Exception:
                    logging.error('  full_path: "%s"', full_path)
                    logging.error('  chunks   : %s', str(chunks))
                    logging.error('  shuffle  : %s', str(shuffle))
                    logging.error('  node     : "%s"', str(node))
                    raise

            for key in sorted_attr_keys:
                dset.attrs[key] = attrs[key]
Example #22
0
def run_unit_tests(path=PISA_PATH,
                   allow_missing=OPTIONAL_MODULES,
                   verbosity=Levels.WARN):
    """Run all tests found at `path` (or recursively below if `path` is a
    directory).

    Each module is imported and each test function is run initially with
    `set_verbosity(verbosity)`, but if an exception is caught, the module is
    re-imported or the test function is re-run with
    `set_verbosity(Levels.TRACE)`, then the traceback from the (original)
    exception emitted is displayed.

    Parameters
    ----------
    path : str
        Path to file or directory

    allow_missing : None or sequence of str

    verbosity : int in pisa.utils.log.Levels

    Raises
    ------
    Exception
        If any import or test fails not in `allow_missing`

    """
    set_verbosity(verbosity)
    logging.info("%sPlatform information:", PFX)
    logging.info("%s  HOSTNAME = %s", PFX, socket.gethostname())
    logging.info("%s  FQDN = %s", PFX, socket.getfqdn())
    logging.info("%s  OS = %s %s", PFX, platform.system(), platform.release())
    for key, val in cpuinfo.get_cpu_info().items():
        logging.info("%s  %s = %s", PFX, key, val)
    logging.info(PFX)
    logging.info("%sModule versions:", PFX)
    for module_name in REQUIRED_MODULES + OPTIONAL_MODULES:
        try:
            module = import_module(module_name)
        except ImportError:
            if module_name in REQUIRED_MODULES:
                raise
            ver = "optional module not installed or not import-able"
        else:
            if hasattr(module, "__version__"):
                ver = module.__version__
            else:
                ver = "?"
        logging.info("%s  %s : %s", PFX, module_name, ver)
    logging.info(PFX)

    path = expand(path, absolute=True, resolve_symlinks=True)
    if allow_missing is None:
        allow_missing = []
    elif isinstance(allow_missing, str):
        allow_missing = [allow_missing]

    tests = find_unit_tests(path)

    module_pypaths_succeeded = []
    module_pypaths_failed = []
    module_pypaths_failed_ignored = []
    test_pypaths_succeeded = []
    test_pypaths_failed = []
    test_pypaths_failed_ignored = []

    for rel_file_path, test_func_names in tests.items():
        pypath = ["pisa"] + rel_file_path[:-3].split("/")
        parent_pypath = ".".join(pypath[:-1])
        module_name = pypath[-1].replace(".", "_")
        module_pypath = f"{parent_pypath}.{module_name}"

        try:
            set_verbosity(verbosity)
            logging.info(PFX + f"importing {module_pypath}")

            set_verbosity(Levels.WARN)
            module = import_module(module_pypath, package=parent_pypath)

        except Exception as err:
            if (isinstance(err, ImportError) and hasattr(err, "name")
                    and err.name in allow_missing  # pylint: disable=no-member
                ):
                err_name = err.name  # pylint: disable=no-member
                module_pypaths_failed_ignored.append(module_pypath)
                logging.warning(
                    f"{PFX}module {err_name} failed to import wile importing"
                    f" {module_pypath}, but ok to ignore")
                continue

            module_pypaths_failed.append(module_pypath)

            set_verbosity(verbosity)
            msg = f"<< FAILURE IMPORTING : {module_pypath} >>"
            logging.error(PFX + "=" * len(msg))
            logging.error(PFX + msg)
            logging.error(PFX + "=" * len(msg))

            # Reproduce the failure with full output
            set_verbosity(Levels.TRACE)
            try:
                import_module(module_name, package=parent_pypath)
            except Exception:
                pass

            set_verbosity(Levels.TRACE)
            logging.exception(err)

            set_verbosity(verbosity)
            logging.error(PFX + "#" * len(msg))

            continue

        else:
            module_pypaths_succeeded.append(module_pypath)

        for test_func_name in test_func_names:
            test_pypath = f"{module_pypath}.{test_func_name}"
            try:
                set_verbosity(verbosity)
                logging.debug(PFX + f"getattr({module}, {test_func_name})")

                set_verbosity(Levels.WARN)
                test_func = getattr(module, test_func_name)

                # Run the test function
                set_verbosity(verbosity)
                logging.info(PFX + f"{test_pypath}()")

                set_verbosity(Levels.WARN)
                test_func()

            except Exception as err:
                if (isinstance(err, ImportError) and hasattr(err, "name")
                        and err.name in allow_missing  # pylint: disable=no-member
                    ):
                    err_name = err.name  # pylint: disable=no-member
                    test_pypaths_failed_ignored.append(module_pypath)
                    logging.warning(
                        PFX +
                        f"{test_pypath} failed because module {err_name} failed to"
                        + f" load, but ok to ignore")

                    continue

                test_pypaths_failed.append(test_pypath)
                set_verbosity(verbosity)
                msg = f"<< FAILURE RUNNING : {test_pypath} >>"
                logging.error(PFX + "=" * len(msg))
                logging.error(PFX + msg)
                logging.error(PFX + "=" * len(msg))

                # Reproduce the error with full output

                set_verbosity(Levels.TRACE)
                try:
                    test_func = getattr(module, test_func_name)
                    with np.printoptions(
                            precision=np.finfo(pisa.FTYPE).precision + 2,
                            floatmode="fixed",
                            sign=" ",
                            linewidth=200,
                    ):
                        test_func()
                except Exception:
                    pass

                set_verbosity(Levels.TRACE)
                logging.exception(err)

                set_verbosity(verbosity)
                logging.error(PFX + "#" * len(msg))

            else:
                test_pypaths_succeeded.append(test_pypath)

            finally:
                # remove references to the test function, e.g. to remove refs
                # to pycuda / numba.cuda contexts so these can be closed
                try:
                    del test_func
                except NameError:
                    pass

        # NOTE: Until we get all GPU code into Numba, need to unload pycuda
        # and/or numba.cuda contexts before a module requiring the other one is
        # to be imported.
        # NOTE: the following causes a traceback to be emitted at the very end
        # of the script, regardless of the exception catching here.
        if (pisa.TARGET == "cuda" and pycuda is not None
                and hasattr(pycuda, "autoinit")
                and hasattr(pycuda.autoinit, "context")):
            try:
                pycuda.autoinit.context.detach()
            except Exception:
                pass

        # Attempt to unload the imported module
        # TODO: pipeline, etc. fail as isinstance(service, (Stage, PiStage)) is False
        #if module_pypath in sys.modules and module_pypath != "pisa":
        #    del sys.modules[module_pypath]
        #del module

        # TODO: crashes program; subseqeunt calls in same shell crash(!?!?)
        # if pisa.TARGET == 'cuda' and nbcuda is not None:
        #    try:
        #        nbcuda.close()
        #    except Exception:
        #        pass

    # Summarize results

    n_import_successes = len(module_pypaths_succeeded)
    n_import_failures = len(module_pypaths_failed)
    n_import_failures_ignored = len(module_pypaths_failed_ignored)
    n_test_successes = len(test_pypaths_succeeded)
    n_test_failures = len(test_pypaths_failed)
    n_test_failures_ignored = len(test_pypaths_failed_ignored)

    set_verbosity(verbosity)
    logging.info(
        PFX + f"<< IMPORT TESTS : {n_import_successes} imported,"
        f" {n_import_failures} failed,"
        f" {n_import_failures_ignored} failed to import but ok to ignore >>")
    logging.info(PFX + f"<< UNIT TESTS : {n_test_successes} succeeded,"
                 f" {n_test_failures} failed,"
                 f" {n_test_failures_ignored} failed but ok to ignore >>")

    # Exit with error if any failures (import or unit test)

    if module_pypaths_failed or test_pypaths_failed:
        msgs = []
        if module_pypaths_failed:
            msgs.append(
                f"{n_import_failures} module(s) failed to import:\n  " +
                ", ".join(module_pypaths_failed))
        if test_pypaths_failed:
            msgs.append(f"{n_test_failures} unit test(s) failed:\n  " +
                        ", ".join(test_pypaths_failed))

        # Note the extra newlines before the exception to make it stand out;
        # and newlines after the exception are due to the pycuda error message
        # that is emitted when we call pycuda.autoinit.context.detach()
        sys.stdout.flush()
        sys.stderr.write("\n\n\n")
        raise Exception("\n".join(msgs) + "\n\n\n")
Example #23
0
    def run_minimizer(self, pprint=True, skip=False):
        # Get initial values
        x0 = self.template_maker.params.free._rescaled_values

        # bfgs steps outside of given bounds by 1 epsilon to evaluate gradients
        try:
            epsilon = self.minimizer_settings['options']['value']['eps']
        except:
            epsilon = self.minimizer_settings['options']['value']['epsilon']
        bounds = [(0 + epsilon, 1 - epsilon)] * len(x0)
        logging.info('running the %s optimizer' %
                     self.minimizer_settings['method']['value'])

        # Using scipy.opt.minimize allows a whole host of minimisers to be used
        # This set by the method value in your minimiser settings file
        self.n_minimizer_calls = 0
        if skip:
            best_fit_vals = x0
            metric_val = self._minimizer_callable(x0, False)
            dict_flags = {
                'warnflag': 0,
                'task': 'skip',
                'funcalls': 0,
                'nit': 0,
                'avg_tmp_time': 0,
                'n_minimizer_calls': 0
            }
        else:
            start_t = time.time()
            minim_result = opt.minimize(
                fun=self._minimizer_callable,
                x0=x0,
                args=(pprint, ),
                bounds=bounds,
                method=self.minimizer_settings['method']['value'],
                options=self.minimizer_settings['options']['value'])

            # get aditional metrics:
            end_t = time.time()
            if pprint:
                # clear the line
                print('')
            print(
                '\naverage template generation time during minimizer run: %.4f ms'
                % ((end_t - start_t) * 1000. / self.n_minimizer_calls))
            avg_tmp_time = (end_t - start_t) * 1000. / self.n_minimizer_calls
            best_fit_vals = minim_result.x
            metric_val = minim_result.fun
            template = self.template_maker.get_outputs()
            template = [t.combine_wildcard('*') for t in template]
            template[0].name = 'total'
            dict_flags = {}
            mod_chi2_val = (
                self.pseudodata.metric_total(expected_values=template,
                                             metric='mod_chi2') +
                template_maker.params.priors_penalty(metric='mod_chi2'))
            dict_flags['agreement_mod_chi2'] = mod_chi2_val
            dict_flags['warnflag'] = minim_result.status
            dict_flags['task'] = minim_result.message
            if 'jac' in minim_result:
                dict_flags['grad'] = minim_result.jac
            dict_flags['funcalls'] = minim_result.nfev
            dict_flags['nit'] = minim_result.nit
            dict_flags['avg_tmp_time'] = avg_tmp_time
            dict_flags['n_minimizer_calls'] = self.n_minimizer_calls
            if dict_flags['warnflag'] > 0:
                logging.warning(str(dict_flags))

        all_metrics = {}
        template = self.template_maker.get_outputs()
        template = [t.combine_wildcard('*') for t in template]
        template[0].name = 'total'
        #for metric in ['llh', 'conv_llh', 'barlow_llh','chi2', 'mod_chi2']:
        for metric in ['llh', 'chi2']:
            all_metrics[metric] = self.pseudodata.metric_total(
                expected_values=template,
                metric=metric) + template_maker.params.priors_penalty(
                    metric=metric)

        return best_fit_vals, metric_val, all_metrics, dict_flags
Example #24
0
def plot_map_comparisons(ref_map, new_map, ref_abv, new_abv, outdir, subdir,
                         name, texname, stagename, servicename,
                         shorttitles=False, ftype='png'):
    """Plot comparisons between two identically-binned PISA 3 style maps"""
    path = [outdir]

    if subdir is None:
        subdir = stagename.lower()
    path.append(subdir)

    if outdir is not None:
        mkdir(os.path.join(*path), warn=False)

    if stagename is not None:
        fname = ['%s_%s_comparisons' %(ref_abv.lower(), new_abv.lower()),
                 'stage_'+stagename]
    else:
        fname = ['%s_%s_comparisons' %(ref_abv.lower(), new_abv.lower())]
    if servicename is not None:
        fname.append('service_'+servicename)
    if name is not None:
        fname.append(name.lower())
    fname = '__'.join(fname) + '.' + ftype

    path.append(fname)

    basetitle = []
    if stagename is not None:
        basetitle.append('%s' % stagename)
    if texname is not None:
        basetitle.append(r'$%s$' % texname)
    basetitle = ' '.join(basetitle)

    validate_map_objs(new_map, ref_map)
    with np.errstate(divide='ignore', invalid='ignore'):
        ratio_map = new_map/ref_map
    diff_map = new_map - ref_map
    with np.errstate(divide='ignore', invalid='ignore'):
        diff_ratio_map = diff_map/ref_map

    max_diff_ratio = np.nanmax(np.abs(diff_ratio_map.hist))

    # Handle cases where ratio returns infinite
    # This isn't necessarily a fail, since all it means is the referene was
    # zero If the new value is sufficiently close to zero then it's still fine
    if max_diff_ratio == float('inf'):
        logging.warning(
            'Infinite value found in ratio tests. Difference tests '
            'now also being calculated'
        )
        # First find all the finite elements
        finite_map = np.isfinite(diff_ratio_map.hist)
        # Then find the nanmax of this, will be our new test value
        max_diff_ratio = np.nanmax(np.abs(diff_ratio_map.hist[finite_map]))
        # Also find all the infinite elements
        infinite_map = np.logical_not(finite_map)
        # This will be a second test value
        max_diff = np.nanmax(np.abs(diff_map.hist[infinite_map]))
    else:
        # Without any infinite elements we can ignore this second test
        max_diff = 0.0

    if outdir is not None:
        gridspec_kw = dict(left=0.03, right=0.968, wspace=0.32)
        fig, axes = plt.subplots(nrows=1, ncols=5, gridspec_kw=gridspec_kw,
                                 sharex=False, sharey=False, figsize=(20, 5))
        if shorttitles:
            ref_map.plot(
                fig=fig,
                ax=axes[0],
                title=basetitle+' '+ref_abv+' (A)',
                cmap=plt.cm.afmhot
            )
            new_map.plot(
                fig=fig,
                ax=axes[1],
                title=basetitle+' '+new_abv+' (B)',
                cmap=plt.cm.afmhot
            )
            ratio_map.plot(
                fig=fig,
                ax=axes[2],
                title='A/B',
                cmap=plt.cm.afmhot
            )
            diff_map.plot(
                fig=fig,
                ax=axes[3],
                title='A-B',
                symm=True,
                cmap=plt.cm.seismic
            )
            diff_ratio_map.plot(
                fig=fig,
                ax=axes[4],
                title='(A-B)/A',
                symm=True,
                cmap=plt.cm.seismic
            )
        else:
            ref_map.plot(
                fig=fig,
                ax=axes[0],
                title=basetitle+' '+ref_abv,
                cmap=plt.cm.afmhot
            )
            new_map.plot(
                fig=fig,
                ax=axes[1],
                title=basetitle+' '+new_abv,
                cmap=plt.cm.afmhot
            )
            ratio_map.plot(
                fig=fig,
                ax=axes[2],
                title=basetitle+' %s/%s' %(new_abv, ref_abv),
                cmap=plt.cm.afmhot
            )
            diff_map.plot(
                fig=fig,
                ax=axes[3],
                title=basetitle+' %s-%s' %(new_abv, ref_abv),
                symm=True,
                cmap=plt.cm.seismic
            )
            diff_ratio_map.plot(
                fig=fig,
                ax=axes[4],
                title=basetitle+' (%s-%s)/%s' %(new_abv, ref_abv, ref_abv),
                symm=True,
                cmap=plt.cm.seismic
            )
        logging.debug('>>>> Plot for inspection saved at %s'
                      %os.path.join(*path))
        fig.savefig(os.path.join(*path))
        plt.close(fig.number)

    return max_diff_ratio, max_diff
Example #25
0
    def get_outputs(self, output_mode=None, force_standard_output=True):
        """Get the outputs of the PISA stage
        Depending on `self.output_mode`, this may be a binned object, or the event container itself

        add option to force an output mode

        force_standard_output: in binned mode, force the return of a single mapset

        """

        # Figure out if the user has specifiec an output mode
        if output_mode is None:
            output_mode = self.output_mode
        else:
            assert output_mode == 'binned' or output_mode == 'events', 'ERROR: user-specified output mode is unrecognized'

        # Handle the binned case
        if output_mode == 'binned':

            if force_standard_output:

                # If we want the error on the map counts to be specified by something
                # other than something called "error" use the key specified in map_output_key
                # (see pi_resample for an example)
                if self.map_output_key:
                    self.outputs = self.data.get_mapset(
                        self.map_output_key,
                        error=self.map_output_error_key,
                    )

                # Very specific case where the output has two keys and one of them is error (compatibility)
                elif len(self.output_apply_keys
                         ) == 2 and 'errors' in self.output_apply_keys:
                    other_key = [
                        key for key in self.output_apply_keys
                        if not key == 'errors'
                    ][0]
                    self.outputs = self.data.get_mapset(other_key,
                                                        error='errors')

                # return the first key in output_apply_key as the map output. add errors to the
                # map only if "errors" is part of the list of output keys
                else:
                    if 'errors' in self.output_apply_keys:
                        self.outputs = self.data.get_mapset(
                            self.output_apply_keys[0], error='errors')
                    else:
                        self.outputs = self.data.get_mapset(
                            self.output_apply_keys[0])

            # More generally: produce one map per output key desired, in a dict
            else:
                self.outputs = OrderedDict()
                for key in self.output_apply_keys:
                    self.outputs[key] = self.data.get_mapset(key)

        # Handle Events mode
        elif output_mode == "events":
            self.outputs = self.data

        # Throw warning that output mode failed
        else:
            self.outputs = None
            logging.warning(
                'pi_stage.py: Cannot create CAKE style output mapset')

        return self.outputs
Example #26
0
def plot_cmp(new, ref, new_label, ref_label, plot_label, file_label, outdir,
             ftype='png'):
    """Plot comparisons between two (identically-binned) maps or map sets.

    Parameters
    ----------
    new : Map or MapSet
    ref : Map or MapSet
    new_label : str
    ref_label : str
    plot_label : str
    file_label : str
    outdir : str
    ftype : str

    """
    path = [outdir]

    if isinstance(ref, Map):
        assert isinstance(new, Map)
        ref_maps = [ref]
        new_maps = [new]

    if outdir is not None:
        mkdir(os.path.join(*path), warn=False)

    for ref, new in zip(ref_maps, new_maps):
        assert ref.binning == new.binning
        fname = get_valid_filename(
            '__'.join([
                get_valid_filename(file_label),
                '%s_vs_%s' %(get_valid_filename(new_label.lower()),
                             get_valid_filename(ref_label.lower()))
            ]) + '.' + ftype
        )
        path.append(fname)

        ratio = new / ref
        diff = new - ref
        fract_diff = diff / ref

        finite_ratio = ratio.hist[np.isfinite(ratio.hist)]
        ratio_mean = np.mean(finite_ratio)
        ratio_median = np.median(finite_ratio)

        finite_diff = diff.hist[np.isfinite(diff.hist)]
        diff_mean = np.mean(finite_diff)
        diff_median = np.median(finite_diff)

        finite_fract_diff = fract_diff.hist[np.isfinite(fract_diff.hist)]
        fract_diff_mean = np.mean(finite_fract_diff)
        fract_diff_median = np.median(finite_fract_diff)

        max_diff_ratio = np.nanmax(fract_diff.hist)

        # Handle cases where ratio returns infinite
        # This isn't necessarily a fail, since all it means is the referene was
        # zero. If the new value is sufficiently close to zero then it's stil
        # fine.
        if max_diff_ratio == np.inf:
            logging.warning(
                'Infinite value found in ratio tests. Difference tests'
                ' now also being calculated'
            )
            # First find all the finite elements
            finite_mask = np.isfinite(fract_diff.hist)
            # Then find the nanmax of this, will be our new test value
            max_diff_ratio = np.nanmax(fract_diff.hist[finite_mask])
            # Also find all the infinite elements; compute a second test value
            max_diff = np.nanmax(diff.hist[~finite_mask])
        else:
            # Without any infinite elements we can ignore this second test
            max_diff = 0.0

        if outdir is not None:
            if new.binning.num_dims == 2:
                n_dims = 2
                n_third_dim_bins = 1
            elif new.binning.num_dims == 3:
                n_dims = 3
                odd_dim_idx = new.binning.shape.index(np.min(new.binning.shape))
                logging.debug('odd_dim_idx: %s', odd_dim_idx)
                n_third_dim_bins = new.binning.shape[odd_dim_idx]

            gridspec_kw = dict(left=0.03, right=0.968, wspace=0.32)
            fig, axes = plt.subplots(nrows=n_third_dim_bins, ncols=5,
                                     gridspec_kw=gridspec_kw,
                                     squeeze=False, sharex=False, sharey=False,
                                     figsize=(20, 5))

            refslice = ref
            newslice = new
            bin_names = None
            if n_dims == 3:
                if odd_dim_idx != 0:
                    refslice = np.moveaxis(ref, source=odd_dim_idx,
                                           destination=0)
                    newslice = np.moveaxis(new, source=odd_dim_idx,
                                           destination=0)
                bin_names = new.binning.dims[odd_dim_idx].bin_names

            for odd_bin_idx in range(n_third_dim_bins):
                if n_dims == 2:
                    thisbin_ref = refslice
                    thisbin_new = newslice
                    tmp_ref_label = ref_label
                    tmp_new_label = new_label

                elif n_dims == 3:
                    thisbin_ref = refslice[odd_bin_idx, ...].squeeze()
                    thisbin_new = newslice[odd_bin_idx, ...].squeeze()

                    if bin_names is not None:
                        suffix = bin_names[odd_bin_idx]
                    else:
                        suffix = format(odd_bin_idx, 'd')
                    tmp_new_label = new_label + ' ' + suffix
                    tmp_ref_label = ref_label + ' ' + suffix

                    ratio = thisbin_new / thisbin_ref
                    diff = thisbin_new - thisbin_ref
                    fract_diff = diff / thisbin_ref

                refmax = np.nanmax(thisbin_ref.hist)
                newmax = np.nanmax(thisbin_new.hist)
                vmax = refmax if refmax > newmax else newmax

                baseplot2(map=thisbin_new,
                          title=tmp_new_label,
                          vmax=vmax,
                          evtrate=True,
                          ax=axes[odd_bin_idx][0])

                baseplot2(map=thisbin_ref,
                          title=tmp_ref_label,
                          vmax=vmax,
                          evtrate=True,
                          ax=axes[odd_bin_idx][1])

                ax, _, _ = baseplot2(map=ratio,
                                     title='%s/%s' %(tmp_new_label,
                                                     tmp_ref_label),
                                     ax=axes[odd_bin_idx][2])
                ax.text(0.95, 0.95, "Mean: %.6f"%ratio_mean,
                        horizontalalignment='right',
                        transform=ax.transAxes, color=(0, 0.8, 0.8))
                ax.text(0.95, 0.91, "Median: %.6f"%ratio_median,
                        horizontalalignment='right',
                        transform=ax.transAxes, color=(0, 0.8, 0.8))

                ax, _, _ = baseplot2(map=diff,
                                     title='%s-%s' %(tmp_new_label,
                                                     tmp_ref_label),
                                     symm=True, ax=axes[odd_bin_idx][3])
                ax.text(0.95, 0.95, "Mean: %.6f"%diff_mean,
                        horizontalalignment='right',
                        transform=ax.transAxes)
                ax.text(0.95, 0.91, "Median: %.6f"%diff_median,
                        horizontalalignment='right',
                        transform=ax.transAxes)

                ax, _, _ = baseplot2(map=fract_diff,
                                     title='(%s-%s)/%s' %(tmp_new_label,
                                                          tmp_ref_label,
                                                          tmp_ref_label),
                                     symm=True,
                                     ax=axes[odd_bin_idx][4])
                ax.text(0.95, 0.95, "Mean: %.6f"%fract_diff_mean,
                        horizontalalignment='right',
                        transform=ax.transAxes)
                ax.text(0.95, 0.91, "Median: %.6f"%fract_diff_median,
                        horizontalalignment='right',
                        transform=ax.transAxes)

            logging.debug('>>>> Plot for inspection saved at %s'
                          %os.path.join(*path))
            fig.savefig(os.path.join(*path))
            plt.close(fig.number)

        return max_diff_ratio, max_diff
Example #27
0
    def __init__(
        self,
        use_transforms,
        params=None,
        expected_params=None,
        input_names=None,
        output_names=None,
        error_method=None,
        disk_cache=None,
        memcache_deepcopy=True,
        transforms_cache_depth=10,
        outputs_cache_depth=0,
        input_binning=None,
        output_binning=None,
        debug_mode=None,
    ):
        # Allow for string inputs, but have to populate into lists for
        # consistent interfacing to one or multiple of these things

        logging.warning('This is a cake-style PISA stage, which is DEPRECATED!')

        self.use_transforms = use_transforms
        """Whether or not stage uses transforms"""

        self._events_hash = None

        self.input_binning = input_binning
        self.output_binning = output_binning
        self.validate_binning()

        # init base class!
        super(Stage, self).__init__(
            params=params,
            expected_params=expected_params,
            input_names=input_names,
            output_names=output_names,
            debug_mode=debug_mode,
            error_method=error_method,
        )

        # Storage of latest transforms and outputs; default to empty
        # TransformSet and None, respectively.
        self.transforms = TransformSet([])
        """A stage that takes to-be-transformed inputs and has had these
        transforms computed stores them here. Before computation, `transforms`
        is an empty TransformSet; a stage that does not make use of these (such
        as a no-input stage) has an empty TransformSet."""

        self.memcache_deepcopy = memcache_deepcopy

        self.transforms_cache_depth = int(transforms_cache_depth)

        self.transforms_cache = None
        """Memory cache object for storing transforms"""

        self.nominal_transforms_cache = None
        """Memory cache object for storing nominal transforms"""

        self.full_hash = True
        """Whether to do full hashing if true, otherwise do fast hashing"""

        self.transforms_cache = MemoryCache(
            max_depth=self.transforms_cache_depth,
            is_lru=True,
            deepcopy=self.memcache_deepcopy,
        )
        self.nominal_transforms_cache = MemoryCache(
            max_depth=self.transforms_cache_depth,
            is_lru=True,
            deepcopy=self.memcache_deepcopy,
        )

        self.outputs_cache_depth = int(outputs_cache_depth)

        self.outputs_cache = None
        """Memory cache object for storing outputs (excludes sideband
        objects)."""

        self.outputs_cache = None
        if self.outputs_cache_depth > 0:
            self.outputs_cache = MemoryCache(
                max_depth=self.outputs_cache_depth,
                is_lru=True,
                deepcopy=self.memcache_deepcopy,
            )

        self.disk_cache = disk_cache
        """Disk cache object"""

        self.disk_cache_path = None
        """Path to disk cache file for this stage/service (or None)."""

        # Include each attribute here for hashing if it is defined and its
        # value is not None
        default_attrs_to_hash = [
            "input_names",
            "output_names",
            "input_binning",
            "output_binning",
        ]
        self._attrs_to_hash = set([])
        for attr in default_attrs_to_hash:
            if not hasattr(self, attr):
                continue
            val = getattr(self, attr)
            if val is None:
                continue
            try:
                self.include_attrs_for_hashes(attr)
            except ValueError():
                pass

        self.events = None
        self.nominal_transforms = None

        # Define useful flags and values for debugging behavior after running

        self.nominal_transforms_loaded_from_cache = None
        """Records which cache nominal transforms were loaded from, or None."""

        self.nominal_transforms_computed = False
        """Records whether nominal transforms were (re)computed."""

        self.transforms_loaded_from_cache = None
        """Records which cache transforms were loaded from, or None."""

        self.transforms_computed = False
        """Records whether transforms were (re)computed."""

        self.nominal_outputs_computed = False
        """Records whether nominal outputs were (re)computed."""

        self.outputs_loaded_from_cache = None
        """Records which cache outputs were loaded from, or None."""

        self.outputs_computed = False
        """Records whether outputs were (re)computed."""

        self.nominal_transforms_hash = None
        self.transforms_hash = None
        self.nominal_outputs_hash = None
        self.outputs_hash = None
        self.instantiate_disk_cache()
Example #28
0
def inj_param_scan(return_outputs=False):
    """Load the HypoTesting class and use it to do an Asimov test across the
    space of one of the injected parameters.

    The user will define the parameter and pass a numpy-interpretable string to
    set the range of values. For example, one could scan over the space of
    theta23 by using a string such as `"numpy.linspace(0.35, 0.65, 31)"` which
    will then be evaluated to figure out a space of theta23 to inject and run
    Asimov tests.
    """
    # NOTE: import here to avoid circular refs
    from pisa.scripts.analysis import parse_args
    init_args_d = parse_args(description=inj_param_scan.__doc__,
                             command=inj_param_scan)

    # Normalize and convert `*_pipeline` filenames; store to `*_maker`
    # (which is argument naming convention that HypoTesting init accepts).
    # For this test, pipeline is required so we don't need the try arguments
    # or the checks on it being None
    filenames = init_args_d.pop('pipeline')
    filenames = sorted([normcheckpath(fname) for fname in filenames])
    init_args_d['h0_maker'] = filenames
    # However, we do need them for the selections, since they can be different
    for maker in ['h0', 'h1', 'data']:
        ps_name = maker + '_param_selections'
        ps_str = init_args_d[ps_name]
        if ps_str is None:
            ps_list = None
        else:
            ps_list = [x.strip().lower() for x in ps_str.split(',')]
        init_args_d[ps_name] = ps_list

    init_args_d['data_maker'] = init_args_d['h0_maker']
    init_args_d['h1_maker'] = init_args_d['h0_maker']
    init_args_d['h0_maker'] = DistributionMaker(init_args_d['h0_maker'])
    init_args_d['h1_maker'] = DistributionMaker(init_args_d['h1_maker'])
    init_args_d['h1_maker'].select_params(init_args_d['h1_param_selections'])
    init_args_d['data_maker'] = DistributionMaker(init_args_d['data_maker'])
    if init_args_d['data_param_selections'] is None:
        init_args_d['data_param_selections'] = \
            init_args_d['h0_param_selections']
        init_args_d['data_name'] = init_args_d['h0_name']
    init_args_d['data_maker'].select_params(
        init_args_d['data_param_selections'])

    # Remove final parameters that don't want to be passed to HypoTesting
    param_name = init_args_d.pop('param_name')
    inj_vals = eval(init_args_d.pop('inj_vals'))
    inj_units = init_args_d.pop('inj_units')
    force_prior = init_args_d.pop('use_inj_prior')

    # Instantiate the analysis object
    hypo_testing = HypoTesting(**init_args_d)

    logging.info('Scanning over %s between %.4f and %.4f with %i vals',
                 param_name, min(inj_vals), max(inj_vals), len(inj_vals))
    # Modify parameters if necessary
    if param_name == 'sin2theta23':
        requested_vals = inj_vals
        inj_vals = np.arcsin(np.sqrt(inj_vals))
        logging.info(
            'Converting to theta23 values. Equivalent range is %.4f to %.4f'
            ' radians, or %.4f to %.4f degrees', min(inj_vals), max(inj_vals),
            min(inj_vals) * 180 / np.pi,
            max(inj_vals) * 180 / np.pi)
        test_name = 'theta23'
        inj_units = 'radians'

    elif param_name == 'deltam31':
        raise ValueError('Need to implement a test where it ensures the sign '
                         'of the requested values matches those in truth and '
                         'the hypo makers (else it makes no sense). For now, '
                         'please select deltam3l instead.')

    elif param_name == 'deltam3l':
        # Ensure all values are the same sign, else it doesn't make any sense
        if not np.alltrue(np.sign(inj_vals)):
            raise ValueError("Not all requested values to inject are the same "
                             "sign. This doesn't make any sense given that you"
                             " have requested to inject different values of "
                             "deltam3l.")
        logging.info('Parameter requested was deltam3l - will convert assuming'
                     ' that this is always the largest of the two splittings '
                     'i.e. deltam3l = deltam31 for deltam3l > 0 and deltam3l '
                     '= deltam32 for deltam3l < 0.')
        inj_sign = np.sign(inj_vals)[0]
        requested_vals = inj_vals
        test_name = 'deltam31'
        deltam21_val = hypo_testing.data_maker.params['deltam21'].value.to(
            inj_units).magnitude
        if inj_sign == 1:
            no_inj_vals = requested_vals
            io_inj_vals = (requested_vals - deltam21_val) * -1.0
        else:
            io_inj_vals = requested_vals
            no_inj_vals = (requested_vals * -1.0) + deltam21_val
        inj_vals = []
        for no_inj_val, io_inj_val in zip(no_inj_vals, io_inj_vals):
            o_vals = {}
            o_vals['nh'] = no_inj_val
            o_vals['ih'] = io_inj_val
            inj_vals.append(o_vals)

    else:
        test_name = param_name
        requested_vals = inj_vals

    unit_inj_vals = []
    for inj_val in inj_vals:
        if isinstance(inj_val, dict):
            o_vals = {}
            for ivkey in inj_val.keys():
                o_vals[ivkey] = inj_val[ivkey] * ureg(inj_units)
            unit_inj_vals.append(o_vals)
        else:
            unit_inj_vals.append(inj_val * ureg(inj_units))
    inj_vals = unit_inj_vals

    # Extend the ranges of the distribution makers so that they reflect the
    # range of the scan. This is a pain if there are different values depending
    # on the ordering. Need to extend the ranges of both values in the
    # hypothesis maker since the hypotheses may minimise over the ordering,
    # and could then go out of range.

    # Also, some parameters CANNOT go negative or else things won't work.
    # To account for this, check if parameters lower value was positive and,
    # if so, enforce that it is positive now.
    if isinstance(inj_vals[0], dict):
        # Calculate ranges for both parameters
        norangediff = max(no_inj_vals) - min(no_inj_vals)
        norangediff = norangediff * ureg(inj_units)
        norangetuple = (min(no_inj_vals) * ureg(inj_units) - 0.5 * norangediff,
                        max(no_inj_vals) * ureg(inj_units) + 0.5 * norangediff)
        iorangediff = max(io_inj_vals) - min(io_inj_vals)
        iorangediff = iorangediff * ureg(inj_units)
        iorangetuple = (min(io_inj_vals) * ureg(inj_units) - 0.5 * iorangediff,
                        max(io_inj_vals) * ureg(inj_units) + 0.5 * iorangediff)
        # Do it for both hierarchies
        for hierarchy, rangetuple in zip(['nh', 'ih'],
                                         [norangetuple, iorangetuple]):
            hypo_testing.set_param_ranges(selection=hierarchy,
                                          test_name=test_name,
                                          rangetuple=rangetuple,
                                          inj_units=inj_units)
        # Select the proper params again
        hypo_testing.h0_maker.select_params(init_args_d['h0_param_selections'])
        hypo_testing.h1_maker.select_params(init_args_d['h1_param_selections'])
    # Otherwise it's way simpler...
    else:
        rangediff = max(inj_vals) - min(inj_vals)
        rangetuple = (min(inj_vals) - 0.5 * rangediff,
                      max(inj_vals) + 0.5 * rangediff)
        hypo_testing.set_param_ranges(selection=None,
                                      test_name=test_name,
                                      rangetuple=rangetuple,
                                      inj_units=inj_units)

    if hypo_testing.data_maker.params[test_name].prior is not None:
        if hypo_testing.data_maker.params[test_name].prior.kind != 'uniform':
            if force_prior:
                logging.warning(
                    'Parameter to be scanned, %s, has a %s prior that you have'
                    ' requested to be left on. This will likely make the'
                    ' results wrong.', test_name,
                    hypo_testing.data_maker.params[test_name].prior.kind)
            else:
                logging.info(
                    'Parameter to be scanned, %s, has a %s prior.This will be'
                    ' changed to a uniform prior (i.e. no prior) for this'
                    ' test.', test_name,
                    hypo_testing.data_maker.params[test_name].prior.kind)
                uniformprior = Prior(kind='uniform')
                hypo_testing.h0_maker.params[test_name].prior = uniformprior
                hypo_testing.h1_maker.params[test_name].prior = uniformprior
    else:
        if force_prior:
            raise ValueError('Parameter to be scanned, %s, does not have a'
                             ' prior but you have requested to force one to be'
                             ' left on. Something is potentially wrong.' %
                             test_name)
        else:
            logging.info(
                'Parameter to be scanned, %s, does not have a prior.'
                ' So nothing needs to be done.', test_name)

    # Everything is set up. Now do the scan.
    outputs = hypo_testing.asimov_inj_param_scan(  # pylint: disable=redefined-outer-name
        param_name=param_name,
        test_name=test_name,
        inj_vals=inj_vals,
        requested_vals=requested_vals,
        h0_name=init_args_d['h0_name'],
        h1_name=init_args_d['h1_name'],
        data_name=init_args_d['data_name'])

    if return_outputs:
        return outputs
Example #29
0
def test_nsi_parameterization():
    """Unit test for Hvac-like NSI parameterization."""
    rand = np.random.RandomState(0)
    alpha1, alpha2, deltansi = rand.rand(3) * 2. * np.pi
    phi12, phi13, phi23 = rand.rand(3) * 2*np.pi - np.pi
    eps_max_abs = 10.0
    eps_scale, eps_prime = rand.rand(2) * 2 * eps_max_abs - eps_max_abs
    nsi_params = VacuumLikeNSIParams()
    nsi_params.eps_scale = eps_scale
    nsi_params.eps_prime = eps_prime
    nsi_params.phi12 = phi12
    nsi_params.phi13 = phi13
    nsi_params.phi23 = phi23
    nsi_params.alpha1 = alpha1
    nsi_params.alpha2 = alpha2
    nsi_params.deltansi = deltansi

    logging.trace('Checking agreement between numerical & analytical NSI matrix...')

    eps_mat_numerical = nsi_params.eps_matrix
    eps_mat_analytical = nsi_params.eps_matrix_analytical

    try:
        close = np.isclose(eps_mat_numerical, eps_mat_analytical, **ALLCLOSE_KW)
        if not np.all(close):
            logging.debug(
                "Numerical NSI matrix:\n%s",
                np.array2string(eps_mat_numerical, **ARY2STR_KW)
            )
            logging.debug(
                "Analytical expansion (by hand):\n%s",
                np.array2string(eps_mat_analytical, **ARY2STR_KW)
            )
            raise ValueError(
                'Evaluating analytical expressions for NSI matrix elements'
                ' does not give agreement with numerical calculation!'
                ' Elementwise agreement:\n%s'
                % close
            )
    except ValueError as err:
        logging.warning(
            "%s\nThis is expected."
            " Going ahead with numerical calculation for now.", err
        )

    logging.trace('Now checking agreement with sympy calculation...')

    eps_mat_sympy = nsi_sympy_mat_mult(
        eps_scale_val=eps_scale,
        eps_prime_val=eps_prime,
        phi12_val=phi12,
        phi13_val=phi13,
        phi23_val=phi23,
        alpha1_val=alpha1,
        alpha2_val=alpha2,
        deltansi_val=deltansi
    )

    logging.trace('ALLCLOSE_KW = {}'.format(ALLCLOSE_KW))
    close = np.isclose(eps_mat_numerical, eps_mat_sympy, **ALLCLOSE_KW)
    if not np.all(close):
        logging.error(
            'Numerical NSI matrix:\n%s',
            np.array2string(eps_mat_numerical, **ARY2STR_KW)
        )
        logging.error(
            'Sympy NSI matrix:\n%s', np.array2string(eps_mat_sympy, **ARY2STR_KW)
        )
        raise ValueError(
            'Sympy and numerical calculations disagree! Elementwise agreement:\n'
            '%s' % close
        )
Example #30
0
    def plot_1d_ratio(self, maps, plot_axis, **kwargs):
        """make a ratio plot for a 1d projection"""
        r_vmin = kwargs.pop('r_vmin', None)
        r_vmax = kwargs.pop('r_vmax', None)
        axis = plt.gca()
        map0 = maps[0]
        plt_binning = map0.binning[plot_axis]
        hist = self.project_1d(map0, plot_axis)
        hist0 = unp.nominal_values(hist)
        # TODO: should this be used somewhere?
        err0 = unp.std_devs(hist)

        axis.set_xlim(
            inf2finite(plt_binning.bin_edges.m)[0],
            inf2finite(plt_binning.bin_edges.m)[-1])
        maximum = 1.0
        minimum = 1.0
        self.reset_colors()
        for map in maps:
            self.next_color()
            hist = self.project_1d(map, plot_axis)
            hist1 = unp.nominal_values(hist)
            err1 = unp.std_devs(hist)
            ratio = np.zeros_like(hist0)
            ratio_error = np.zeros_like(hist0)
            for i, hist0i in enumerate(hist0):
                if hist1[i] == 0 and hist0i == 0:
                    ratio[i] = 1.
                    ratio_error[i] = 1.
                elif hist1[i] != 0 and hist0i == 0:
                    logging.warning('deviding non 0 by 0 for ratio')
                    ratio[i] = 0.
                    ratio_error[i] = 1.
                else:
                    ratio[i] = hist1[i] / hist0i
                    ratio_error[i] = err1[i] / hist0i
                    minimum = min(minimum, ratio[i])
                    maximum = max(maximum, ratio[i])

            if map.tex == 'data':
                axis.errorbar(plt_binning.weighted_centers.m,
                              ratio,
                              yerr=ratio_error,
                              fmt='o',
                              markersize='4',
                              label=tex_dollars(text2tex('data')),
                              color='k',
                              ecolor='k',
                              mec='k')
            else:
                _ = axis.hist(inf2finite(plt_binning.weighted_centers.m),
                              weights=ratio,
                              bins=inf2finite(plt_binning.bin_edges.m),
                              histtype='step',
                              lw=1.5,
                              label=tex_dollars(text2tex(map.tex)),
                              color=self.color)

                axis.bar(plt_binning.bin_edges.m[:-1],
                         2 * ratio_error,
                         bottom=ratio - ratio_error,
                         width=plt_binning.bin_widths.m,
                         alpha=0.25,
                         linewidth=0,
                         color=self.color)

        if self.grid:
            plt.grid(True, which="both", ls='-', alpha=0.2)
        self.fig.subplots_adjust(hspace=0)
        axis.set_ylabel(tex_dollars(text2tex('ratio')))
        axis.set_xlabel(tex_dollars(plt_binning.label))
        # Calculate nice scale:
        if r_vmin is not None and r_vmax is not None:
            axis.set_ylim(1 - r_vmin, 1 + r_vmax)
        else:
            off = max(maximum - 1, 1 - minimum)
            axis.set_ylim(1 - 1.2 * off, 1 + 1.2 * off)