Exemple #1
0
def get_acceptance(config):
    """Get an acceptance object.

    Arguments:
        config (dict): Acceptance to load. Its keys are:
            + `variables` (list[str]): List of variable names.
            + `generation` (dict): Generation configuration. It needs to have a `name` entry, which corresponds
                to the name of the generator efficiency. Any other key will be passed to `get_efficiency` as
                `extra_parameters`
            + `reconstruction` (dict): Reconstruction configuration. It needs to have a `name` entry, which corresponds
                to the name of the reconstruction efficiency. Any other key will be passed to `get_efficiency` as
                `extra_parameters`

    Return:
        `analysis.efficiency.acceptance.Acceptance`: Acceptance object.

    Raise:
        analysis.utils.config.ConfigError: If the input config is missing keys.
        See `analysis.utils.config.load_config`.

    """
    config_keys = [key for key, _ in unfold_config(config)]
    # missing_keys should be empty if the needed keys have been provided. Otherwise complain!
    missing_keys = set(('variables', 'generation/name',
                        'reconstruction/name')) - set(config_keys)

    if missing_keys:
        raise ConfigError(
            "Missing configuration key! -> {}".format(missing_keys))
    # Load the efficiencies
    gen_efficiency = get_efficiency_model(
        load_config(get_efficiency_path(config['generation'].pop('name')),
                    validate=('model', 'variables', 'parameters')),
        **config['generation'])
    reco_efficiency = get_efficiency_model(
        load_config(get_efficiency_path(config['reconstruction'].pop('name')),
                    validate=('model', 'variables', 'parameters')),
        **config['reconstruction'])
    # Check the variables
    if set(config['variables']) != set(gen_efficiency.get_variables()):
        raise ConfigError(
            "Mismatch in variables between acceptance and generation")
    if set(config['variables']) != set(reco_efficiency.get_variables()):
        raise ConfigError(
            "Mismatch in variables between acceptance and reconstruction")
    # Now create the acceptance
    return Acceptance(config['variables'], gen_efficiency, reco_efficiency)
Exemple #2
0
def load_efficiency_model(model_name, **extra_parameters):
    """Load efficiency from file.

    The file path is determined from the `name` using the `paths.get_efficiency_path`
    function.

    Arguments:
        model_name (str): Name of the efficiency model.
        **extra_parameters (dict): Extra configuration parameters to override the entries
            in the `parameters` node loaded from the efficiency file.

    Raise:
        OSError: If the efficiency file does not exist.
        analysis.utils.config.ConfigError: If there is a problem with the efficiency model.

    """
    path = get_efficiency_path(model_name)
    if not os.path.exists(path):
        raise OSError("Cannot find efficiency file -> {}".format(path))
    config = load_config(path, validate=('model', 'variables', 'parameters'))
    return get_efficiency_model(config, **extra_parameters)
def run(config_files, link_from):
    """Run the script.

    If the efficiency file exists, only the plots are remade.

    Arguments:
        config_files (list[str]): Path to the configuration files.
        link_from (str): Path to link the results from.

    Raise:
        OSError: If there either the configuration file does not exist some
            of the input files cannot be found.
        KeyError: If some configuration data are missing.
        ValueError: If there is any problem in configuring the efficiency model.
        RuntimeError: If there is a problem during the efficiency fitting.

    """
    try:
        config = _config.load_config(*config_files,
                                     validate=[
                                         'name', 'data/source', 'data/tree',
                                         'parameters', 'model', 'variables'
                                     ])
    except OSError:
        raise OSError(
            "Cannot load configuration files: {}".format(config_files))
    except _config.ConfigError as error:
        if 'name' in error.missing_keys:
            logger.error("No name was specified in the config file!")
        if 'data/file' in error.missing_keys:
            logger.error("No input data specified in the config file!")
        if 'data/tree' in error.missing_keys:
            logger.error("No input data specified in the config file!")
        if 'model' in error.missing_keys:
            logger.error("No efficiency model specified in the config file!")
        if 'parameters' in error.missing_keys:
            logger.error(
                "No efficiency model parameters specified in the config file!")
        if 'variables' in error.missing_keys:
            logger.error(
                "No efficiency variables to model have been specified in the config file!"
            )
        raise KeyError("ConfigError raised -> {}".format(error.missing_keys))
    except KeyError as error:
        logger.error("YAML parsing error -> %s", error)
        raise
    # Do checks and load things
    plot_files = {}
    if config.get('plot', False):
        for var_name in config['variables']:
            plot_files[var_name] = get_efficiency_plot_path(config['name'],
                                                            var=var_name)
    efficiency_class = get_efficiency_model_class(config['model'])
    if not efficiency_class:
        raise ValueError("Unknown efficiency model -> {}".format(
            config['model']))
    # Let's do it
    # pylint: disable=E1101
    if not all(os.path.exists(file_name)
               for file_name in plot_files.values()) or \
            not os.path.exists(_paths.get_efficiency_path(config['name'])):  # If plots don't exist, we load data
        logger.info("Loading data, this may take a while...")
        weight_var = config['data'].get('weight-var-name', None)
        # Prepare data
        config['data']['output-format'] = 'pandas'
        config['data']['variables'] = list(config['variables'])
        if weight_var:
            config['data']['variables'].append(weight_var)
        input_data = get_data(config['data'], **{'output-format': 'pandas'})
        if weight_var:
            logger.info("Data loaded, using %s as weight", weight_var)
        else:
            logger.info("Data loaded, not using any weights")

        if not os.path.exists(_paths.get_efficiency_path(config['name'])):
            logger.info("Fitting efficiency model")
            try:
                eff = efficiency_class.fit(input_data, config['variables'],
                                           weight_var, **config['parameters'])
            except (ValueError, TypeError) as error:
                raise ValueError(
                    "Cannot configure the efficiency model -> {}".format(
                        error.message))
            except KeyError as error:
                raise RuntimeError("Missing key -> {}".format(error))
            except Exception as error:
                raise RuntimeError(error)
            output_file = eff.write_to_disk(config['name'], link_from)
            logger.info("Written efficiency file -> %s", output_file)
        else:
            logger.warning(
                "Output efficiency already exists, only redoing plots")
            eff = load_efficiency_model(config['name'])
        if plot_files:
            import seaborn as sns
            sns.set_style("white")
            plt.style.use('file://{}'.format(
                os.path.join(get_global_var('STYLE_PATH'),
                             'matplotlib_LHCb.mplstyle')))
            plots = eff.plot(input_data,
                             weight_var,
                             labels=config.get('plot-labels', {}))
            for var_name, plot in plots.items():
                logger.info("Plotting '%s' efficiency -> %s", var_name,
                            plot_files[var_name])
                plot.savefig(plot_files[var_name], bbox_inches='tight')
    else:
        logger.info("Efficiency file exists: %s. Nothing to do!",
                    _paths.get_efficiency_path(config['name']))