コード例 #1
0
ファイル: result.py プロジェクト: mayou36/analysis-tools
    def to_yaml_file(self, name):
        """Convert fit result to YAML format.

        File name is determined by get_fit_result_path.

        Arguments:
            name (str): Name of the fit result.

        Return:
            str: Output file name.

        Raise:
            NotInitializedError: If the fit result has not been initialized.

        """
        with _paths.work_on_file(
                name, path_func=_paths.get_fit_result_path) as file_name:
            write_config(self.to_yaml(), file_name)
        return file_name
コード例 #2
0
    def write_to_disk(self, name, link_from=None):
        """Write efficiency object to disk.

        Arguments:
            name (str): Name of the efficiency object.
            link_from (str, optional): Storage to link from. Defaults to
                no link.

        Return:
            str: Path of the output file.

        """
        if not self.MODEL_NAME:
            raise NotImplementedError("Cannot save generic Efficiency")
        with work_on_file(name, get_efficiency_path, link_from) as file_name:
            write_config(
                {
                    'model': self.MODEL_NAME,
                    'variables': self.get_variables(),
                    'parameters': self._config
                }, file_name)
        return file_name
コード例 #3
0
def run(config_files, link_from, verbose):
    """Run the script.

    Run a generate/fit sequence as many times as requested.

    Arguments:
        config_files (list[str]): Path to the configuration files.
        link_from (str): Path to link the results from.
        verbose (bool): Give verbose output?

    Raise:
        OSError: If the configuration file or some other input does not exist.
        AttributeError: If the input data are incompatible with a previous fit.
        KeyError: If some configuration data are missing.
        ValueError: If there is any problem in configuring the PDF factories.
        RuntimeError: If there is a problem during the fitting.

    """
    try:
        config = _config.load_config(
            *config_files, validate=['syst/ntoys', 'name', 'randomizer'])
    except OSError:
        raise OSError(
            "Cannot load configuration files: {}".format(config_files))
    except _config.ConfigError as error:
        if 'syst/ntoys' in error.missing_keys:
            logger.error("Number of toys not specified")
        if 'name' in error.missing_keys:
            logger.error("No name was specified in the config file!")
        if 'randomizer' in error.missing_keys:
            logger.error(
                "No randomizer configuration specified in config file!")
        raise KeyError("ConfigError raised -> {}".format(error.missing_keys))
    except KeyError as error:
        logger.error("YAML parsing error -> %s", error)
        raise
    model_name = config['syst'].get('model',
                                    'model')  # TODO: 'model' returns name?
    try:
        model_config = config[model_name]
    except KeyError as error:
        logger.error("Missing model configuration -> %s", str(error))
        raise KeyError("Missing model configuration")
    # Load fit model
    try:
        fit_model = configure_model(copy.deepcopy(model_config))
        randomizer_model = configure_model(copy.deepcopy(model_config))
    except KeyError:
        logger.exception('Error loading model')
        raise ValueError('Error loading model')
    # Some info
    ntoys = config['syst'].get('ntoys-per-job', config['syst']['ntoys'])
    logger.info("Doing %s generate/fit sequences", ntoys)
    logger.info("Systematics job name: %s", config['name'])
    if link_from:
        config['link-from'] = link_from
    if 'link-from' in config:
        logger.info("Linking toy data from %s", config['link-from'])
    else:
        logger.debug("No linking specified")
    # Now load the acceptance
    try:
        acceptance = get_acceptance(config['acceptance']) \
            if 'acceptance' in config \
            else None
    except _config.ConfigError as error:
        raise KeyError("Error loading acceptance -> {}".format(error))
    # Fit strategy
    fit_strategy = config['syst'].get('strategy', 'simple')
    # Load randomizer configuration
    randomizer = get_randomizer(config['randomizer'])(
        model=randomizer_model,
        config=config['randomizer'],
        acceptance=acceptance)
    # Set seed
    job_id = get_job_id()
    # Start looping
    fit_results = {}
    logger.info("Starting sampling-fit loop (print frequency is 20)")
    initial_mem = memory_usage()
    initial_time = default_timer()
    do_extended = config['syst'].get('extended', False)
    do_minos = config['syst'].get('minos', False)
    for fit_num in range(ntoys):
        # Logging
        if (fit_num + 1) % 20 == 0:
            logger.info("  Fitting event %s/%s", fit_num + 1, ntoys)
        # Generate a dataset
        seed = get_urandom_int(4)
        np.random.seed(seed=seed)
        ROOT.RooRandom.randomGenerator().SetSeed(seed)
        try:
            # Get a randomized dataset and fit it with the nominal fit
            dataset = randomizer.get_dataset(randomize=True)
            gen_values = randomizer.get_current_values()
            fit_result_nominal = fit(fit_model,
                                     model_name,
                                     fit_strategy,
                                     dataset,
                                     verbose,
                                     Extended=do_extended,
                                     Minos=do_minos)
            # Fit the randomized dataset with the randomized values as nominal
            fit_result_rand = fit(randomizer_model,
                                  model_name,
                                  fit_strategy,
                                  dataset,
                                  verbose,
                                  Extended=do_extended,
                                  Minos=do_minos)
            randomizer.reset_values(
            )  # Needed to avoid generating unphysical values
        except ValueError:
            raise RuntimeError()
        except Exception:
            # logger.exception()
            raise RuntimeError()  # TODO: provide more information?
        result = {}
        result['fitnum'] = fit_num
        result['seed'] = seed
        # Save the results of the randomized fit
        result_roofit_rand = FitResult.from_roofit(fit_result_rand)
        result['param_names'] = result_roofit_rand.get_fit_parameters().keys()
        result['rand'] = result_roofit_rand.to_plain_dict()
        result['rand_cov'] = result_roofit_rand.get_covariance_matrix()
        _root.destruct_object(fit_result_rand)
        # Save the results of the nominal fit
        result_roofit_nominal = FitResult.from_roofit(fit_result_nominal)
        result['nominal'] = result_roofit_nominal.to_plain_dict()
        result['nominal_cov'] = result_roofit_nominal.get_covariance_matrix()
        result['gen'] = gen_values
        _root.destruct_object(result_roofit_nominal)
        _root.destruct_object(dataset)
        fit_results[fit_num] = result
        logger.debug("Cleaning up")
    logger.info("Fitting loop over")
    logger.info("--> Memory leakage: %.2f MB/sample-fit",
                (memory_usage() - initial_mem) / ntoys)
    logger.info("--> Spent %.0f ms/sample-fit",
                (default_timer() - initial_time) * 1000.0 / ntoys)
    logger.info("Saving to disk")
    data_res = []
    cov_matrices = {}
    # Get covariance matrices
    for fit_num, fit_res_i in fit_results.items():
        fit_res = {
            'fitnum': fit_res_i['fitnum'],
            'seed': fit_res_i['seed'],
            'model_name': model_name,
            'fit_strategy': fit_strategy
        }
        param_names = fit_res_i['param_names']
        cov_folder_rand = os.path.join(str(job_id), str(fit_res['fitnum']),
                                       'rand')
        cov_matrices[cov_folder_rand] = pd.DataFrame(fit_res_i['rand_cov'],
                                                     index=param_names,
                                                     columns=param_names)
        cov_folder_nominal = os.path.join(str(job_id), str(fit_res['fitnum']),
                                          'nominal')
        cov_matrices[cov_folder_nominal] = pd.DataFrame(
            fit_res_i['nominal_cov'], index=param_names, columns=param_names)
        for res_name, res_value in fit_res_i['rand'].items():
            fit_res['{}_rand'.format(res_name)] = res_value
        for res_name, res_value in fit_res_i['nominal'].items():
            fit_res['{}_nominal'.format(res_name)] = res_value
        for res_name, res_value in fit_res_i['gen'].items():
            fit_res['{}_gen'.format(res_name)] = res_value
        data_res.append(fit_res)
    data_frame = pd.DataFrame(data_res)
    fit_result_frame = pd.concat([
        data_frame,
        pd.concat([pd.DataFrame({'jobid': [job_id]})] *
                  data_frame.shape[0]).reset_index(drop=True)
    ],
                                 axis=1)
    try:
        # pylint: disable=E1101
        with _paths.work_on_file(config['name'],
                                 path_func=_paths.get_toy_fit_path,
                                 link_from=config.get('link-from',
                                                      None)) as toy_fit_file:
            with modify_hdf(toy_fit_file) as hdf_file:
                # First fit results
                hdf_file.append('fit_results', fit_result_frame)
                # Save covarinance matrix under 'covariance/jobid/fitnum
                for cov_folder, cov_matrix in cov_matrices.items():
                    cov_path = os.path.join('covariance', cov_folder)
                    hdf_file.append(cov_path, cov_matrix)
                # Generator info
                hdf_file.append(
                    'input_values',
                    pd.DataFrame.from_dict(randomizer.get_input_values(),
                                           orient='index'))

            logger.info("Written output to %s", toy_fit_file)
            if 'link-from' in config:
                logger.info("Linked to %s", config['link-from'])
    except OSError as excp:
        logger.error(str(excp))
        raise
    except ValueError as error:
        logger.exception("Exception on dataset saving")
        raise RuntimeError(str(error))
コード例 #4
0
def run(config_files, link_from):
    """Run the script.

    Arguments:
        config_files (list[str]): Path to the configuration files.
        link_from (str): Path to link the results from.

    Raise:
        KeyError: If some configuration data are missing.
        OSError: If there either the configuration file does not exist or if
            there is a problem preparing the output path.
        ValueError: If there is any problem in configuring the PDF factories.
        RuntimeError: If there is a problem during the generation.

    """
    # Configure
    try:
        config = load_config(*config_files,
                             validate=['gen/nevents', 'name', 'gen-model'])
    except OSError:
        raise OSError(
            "Cannot load configuration files: {}".format(config_files))
    except ConfigError as error:
        if 'gen/nevents' in error.missing_keys:
            logger.error("Number of events not specified")
        if 'name' in error.missing_keys:
            logger.error("No name was specified in the config file!")
        if 'gen-model' in error.missing_keys:
            logger.error(
                "No generation model were specified in the config file!")
        raise KeyError("ConfigError raised -> {}".format(error.missing_keys))
    except KeyError as error:
        logger.error("YAML parsing error -> %s", error)
        raise
    # Ignore renaming
    logger.info("Generating %s events", config['gen']['nevents'])
    logger.info("Generation job name: %s", config['name'])
    if link_from:
        config['link-from'] = link_from
    if 'link-from' in config:
        logger.info("Linking toy data from %s", config['link-from'])
    else:
        logger.debug("No linking specified")
    # Set seed
    job_id = get_job_id()
    seed = get_urandom_int(4)
    np.random.seed(seed=seed)
    ROOT.RooRandom.randomGenerator().SetSeed(seed)
    # Generate
    try:
        physics = configure_model(config['gen-model'])
    except KeyError as error:
        logger.error("Cannot find physics factory")
        raise ValueError('{}'.format(error))
    except ValueError:
        logger.error("Problem dealing with shared parameters")
        raise
    if isinstance(physics, (SumPhysicsFactory, SimultaneousPhysicsFactory)):
        logger.warning("Generating a RooAddPdf or a RooSimultaneous: "
                       "yields will be generated at a fixed value")
    try:
        dataset = generate(
            physics, config['gen'].get('nevents-per-job',
                                       config['gen']['nevents']))
    except ValueError as error:
        logger.exception("Exception on generation")
        raise RuntimeError(str(error))
    # Get toy information
    toy_info = {
        var.GetName(): [var.getVal()]
        for var in physics.get_gen_parameters()
    }
    n_evts = sum(config['gen']['nevents'].values()) \
        if isinstance(config['gen']['nevents'], dict) \
        else config['gen']['nevents']
    toy_info.update({'seed': [seed], 'jobid': [job_id], 'nevents': n_evts})
    try:
        # Save
        with work_on_file(config['name'],
                          path_func=get_toy_path,
                          link_from=config.get('link-from')) as toy_file:
            with modify_hdf(toy_file) as hdf_file:
                hdf_file.append('data', dataset.assign(jobid=job_id))
                hdf_file.append('toy_info', pd.DataFrame(toy_info))
        # Say something
        logger.info("Written output to %s", toy_file)
        if 'link-from' in config:
            logger.info("Linked to %s", config['link-from'])
    except OSError as excp:
        logger.error(str(excp))
        raise
    except ValueError as error:
        logger.exception("Exception on dataset saving")
        raise RuntimeError(str(error))
コード例 #5
0
ファイル: fit_toys.py プロジェクト: mayou36/analysis-tools
def run(config_files, link_from, verbose):
    """Run the script.

    Run a sample/fit sequence as many times as requested.

    Arguments:
        config_files (list[str]): Path to the configuration files.
        link_from (str): Path to link the results from.
        verbose (bool): Give verbose output?

    Raise:
        OSError: If there either the configuration file does not exist some
            of the input toys cannot be found.
        AttributeError: If the input data are incompatible with a previous fit.
        KeyError: If some configuration data are missing.
        ValueError: If there is any problem in configuring the PDF factories.
        RuntimeError: If there is a problem during the fitting.

    """
    try:
        config = _config.load_config(*config_files,
                                     validate=['fit/nfits', 'name', 'data'])
    except OSError:
        raise OSError(
            "Cannot load configuration files: {}".format(config_files))
    except ConfigError as error:
        if 'fit/nfits' in error.missing_keys:
            logger.error("Number of fits not specified")
        if 'name' in error.missing_keys:
            logger.error("No name was specified in the config file!")
        if 'data' in error.missing_keys:
            logger.error("No input data specified in the config file!")
        raise KeyError("ConfigError raised -> {}".format(error.missing_keys))
    except KeyError as error:
        logger.error("YAML parsing error -> %s", error)
    try:
        models = {
            model_name: config[model_name]
            for model_name in config['fit'].get('models', ['model'])
        }
    except KeyError as error:
        logger.error("Missing model configuration -> %s", str(error))
        raise KeyError("Missing model configuration")
    if not models:
        logger.error(
            "Empty list specified in the config file under 'fit/models'!")
        raise KeyError()
    fit_strategies = config['fit'].get('strategies', ['simple'])
    if not fit_strategies:
        logger.error("Empty fit strategies were specified in the config file!")
        raise KeyError()
    # Some info
    nfits = config['fit'].get('nfits-per-job', config['fit']['nfits'])
    logger.info("Doing %s sample/fit sequences", nfits)
    logger.info("Fit job name: %s", config['name'])
    if link_from:
        config['link-from'] = link_from
    if 'link-from' in config:
        logger.info("Linking toy data from %s", config['link-from'])
    else:
        logger.debug("No linking specified")
    # Analyze data requirements
    logger.info("Loading input data")
    data = {}
    gen_values = {}
    if len(set('category' in data_source
               for data_source in config['data'])) > 1:
        raise KeyError("Categories in 'data' not consistently specified.")
    for data_id, data_source in config['data'].items():
        try:
            source_toy = data_source['source']
        except KeyError:
            logger.error("Data source not specified")
            raise
        data[data_id] = (get_data({
            'source': source_toy,
            'source-type': 'toy',
            'tree': 'data',
            'output-format': 'pandas',
            'selection': data_source.get('selection')
        }), data_source['nevents'], data_source.get('poisson'),
                         data_source.get('category'))
        # Generator values
        toy_info = get_data({
            'source': source_toy,
            'source-type': 'toy',
            'tree': 'toy_info',
            'output-format': 'pandas'
        })
        gen_values[data_id] = {}
        for var_name in toy_info.columns:
            if var_name in ('seed', 'jobid', 'nevents'):
                continue
            gen_values[data_id][var_name] = toy_info[var_name].iloc[0]
    try:
        fit_models = {}
        for model_name in models:
            if model_name not in config:
                raise KeyError(
                    "Missing model definition -> {}".format(model_name))
            fit_models[model_name] = configure_model(config[model_name])
            if any(yield_.isConstant()
                   for yield_ in fit_models[model_name].get_yield_vars()
                   if yield_):
                logger.warning(
                    "Model %s has constant yields. "
                    "Be careful when configuring the input data, you may need to disable poisson sampling",
                    model_name)
    except KeyError:
        logger.exception("Error loading model")
        raise ValueError("Error loading model")
    if len(set(model.is_extended() for model in fit_models.values())) == 2:
        logger.error("Mix of extended and non-extended models!")
        raise ValueError("Error loading fit models")
    # Let's check these generator values against the output file
    try:
        gen_values_frame = {}
        # pylint: disable=E1101
        with _paths.work_on_file(config['name'], _paths.get_toy_fit_path,
                                 config.get('link-from')) as toy_fit_file:
            with modify_hdf(toy_fit_file) as hdf_file:
                logger.debug("Checking generator values")
                test_gen = [('gen_{}'.format(data_source)) in hdf_file
                            for data_source in gen_values]
                if all(test_gen
                       ):  # The data were written already, crosscheck values
                    for source_id, gen_value in gen_values.items():
                        if not all(
                                hdf_file['gen_{}'.format(data_source)]
                            [var_name].iloc[0] == var_value
                                for var_name, var_value in gen_value.items()):
                            raise AttributeError(
                                "Generated and stored values don't match for source '{}'"
                                .format(source_id))
                elif not any(test_gen):  # No data were there, just overwrite
                    for source_id, gen_values in gen_values.items():
                        gen_data = {
                            'id':
                            source_id,
                            'source':
                            _paths.get_toy_path(
                                config['data'][source_id]['source']),
                            'nevents':
                            config['data'][source_id]['nevents']
                        }
                        gen_data.update(gen_values)
                        gen_values_frame['gen_{}'.format(
                            source_id)] = pd.DataFrame([gen_data])
                else:
                    raise AttributeError("Inconsistent number of data sources")
    except OSError as excp:
        logger.error(str(excp))
        raise
    # Now load the acceptance
    try:
        acceptance = get_acceptance(config['acceptance']) \
            if 'acceptance' in config \
            else None
    except ConfigError as error:
        raise KeyError("Error loading acceptance -> {}".format(error))
    # Prepare output
    gen_events = defaultdict(list)
    # Set seed
    job_id = get_job_id()
    if job_id:
        seed = int(job_id.split('.')[0])
    else:
        import random
        job_id = 'local'
        seed = random.randint(0, 100000)
    np.random.seed(seed=seed)
    ROOT.RooRandom.randomGenerator().SetSeed(seed)
    # Start looping
    fit_results = defaultdict(list)
    logger.info("Starting sampling-fit loop (print frequency is 20)")
    initial_mem = memory_usage()
    initial_time = default_timer()
    for fit_num in range(nfits):
        # Logging
        if (fit_num + 1) % 20 == 0:
            logger.info("  Fitting event %s/%s", fit_num + 1, nfits)
        # Get a compound dataset
        seed = get_urandom_int(4)
        np.random.seed(seed=seed)
        ROOT.RooRandom.randomGenerator().SetSeed(seed)
        try:
            logger.debug("Sampling input data")
            datasets, sample_sizes = get_datasets(data, acceptance, fit_models)
            for sample_name, sample_size in sample_sizes.items():
                gen_events['N^{{{}}}_{{gen}}'.format(sample_name)].append(
                    sample_size)
            logger.debug("Sampling finalized")
        except KeyError:
            logger.exception("Bad data configuration")
            raise
        logger.debug("Fitting")
        for model_name in models:
            dataset = datasets.pop(model_name)
            fit_model = fit_models[model_name]
            # Now fit
            for fit_strategy in fit_strategies:
                toy_key = (model_name, fit_strategy)
                try:
                    fit_result = fit(fit_model,
                                     model_name,
                                     fit_strategy,
                                     dataset,
                                     verbose,
                                     Extended=config['fit'].get(
                                         'extended', False),
                                     Minos=config['fit'].get('minos', False))
                except ValueError:
                    raise RuntimeError()
                # Now results are in fit_parameters
                result_roofit = FitResult.from_roofit(fit_result)
                result = result_roofit.to_plain_dict()
                result['cov_matrix'] = result_roofit.get_covariance_matrix()
                result['param_names'] = result_roofit.get_fit_parameters(
                ).keys()
                result['fitnum'] = fit_num
                result['seed'] = seed
                fit_results[toy_key].append(result)
                _root.destruct_object(fit_result)
            _root.destruct_object(dataset)
        logger.debug("Cleaning up")
    logger.info("Fitting loop over")
    logger.info("--> Memory leakage: %.2f MB/sample-fit",
                (memory_usage() - initial_mem) / nfits)
    logger.info("--> Spent %.0f ms/sample-fit",
                (default_timer() - initial_time) * 1000.0 / nfits)
    logger.info("Saving to disk")
    data_res = []
    cov_matrices = {}
    # Get gen values for this model
    for (model_name, fit_strategy), fits in fit_results.items():
        for fit_res in fits:
            fit_res = fit_res.copy()
            fit_res['model_name'] = model_name
            fit_res['fit_strategy'] = fit_strategy

            cov_folder = os.path.join(str(job_id), str(fit_res['fitnum']))
            param_names = fit_res.pop('param_names')
            cov_matrices[cov_folder] = pd.DataFrame(fit_res.pop('cov_matrix'),
                                                    index=param_names,
                                                    columns=param_names)
            data_res.append(fit_res)
    data_frame = pd.DataFrame(data_res)
    fit_result_frame = pd.concat([
        pd.DataFrame(gen_events), data_frame,
        pd.concat([pd.DataFrame({'jobid': [job_id]})] *
                  data_frame.shape[0]).reset_index(drop=True)
    ],
                                 axis=1)
    try:
        # pylint: disable=E1101
        with _paths.work_on_file(
                config['name'],
                path_func=_paths.get_toy_fit_path,
                link_from=config.get('link-from')) as toy_fit_file:
            with modify_hdf(toy_fit_file) as hdf_file:
                # First fit results
                hdf_file.append('fit_results', fit_result_frame)
                # Save covarinance matrix under 'covariance/jobid/fitnum
                for cov_folder, cov_matrix in cov_matrices.items():
                    cov_path = os.path.join('covariance', cov_folder)
                    hdf_file.append(cov_path, cov_matrix)
                # Generator info
                for key_name, gen_frame in gen_values_frame.items():
                    hdf_file.append(key_name, gen_frame)

            logger.info("Written output to %s", toy_fit_file)
            if 'link-from' in config:
                logger.info("Linked to %s", config['link-from'])
    except OSError as excp:
        logger.error(str(excp))
        raise
    except ValueError as error:
        logger.exception("Exception on dataset saving")
        raise RuntimeError(str(error))