def get_acceptance(config): """Get an acceptance object. Arguments: config (dict): Acceptance to load. Its keys are: + `variables` (list[str]): List of variable names. + `generation` (dict): Generation configuration. It needs to have a `name` entry, which corresponds to the name of the generator efficiency. Any other key will be passed to `get_efficiency` as `extra_parameters` + `reconstruction` (dict): Reconstruction configuration. It needs to have a `name` entry, which corresponds to the name of the reconstruction efficiency. Any other key will be passed to `get_efficiency` as `extra_parameters` Return: `analysis.efficiency.acceptance.Acceptance`: Acceptance object. Raise: analysis.utils.config.ConfigError: If the input config is missing keys. See `analysis.utils.config.load_config`. """ config_keys = [key for key, _ in unfold_config(config)] # missing_keys should be empty if the needed keys have been provided. Otherwise complain! missing_keys = set(('variables', 'generation/name', 'reconstruction/name')) - set(config_keys) if missing_keys: raise ConfigError( "Missing configuration key! -> {}".format(missing_keys)) # Load the efficiencies gen_efficiency = get_efficiency_model( load_config(get_efficiency_path(config['generation'].pop('name')), validate=('model', 'variables', 'parameters')), **config['generation']) reco_efficiency = get_efficiency_model( load_config(get_efficiency_path(config['reconstruction'].pop('name')), validate=('model', 'variables', 'parameters')), **config['reconstruction']) # Check the variables if set(config['variables']) != set(gen_efficiency.get_variables()): raise ConfigError( "Mismatch in variables between acceptance and generation") if set(config['variables']) != set(reco_efficiency.get_variables()): raise ConfigError( "Mismatch in variables between acceptance and reconstruction") # Now create the acceptance return Acceptance(config['variables'], gen_efficiency, reco_efficiency)
def load_efficiency_model(model_name, **extra_parameters): """Load efficiency from file. The file path is determined from the `name` using the `paths.get_efficiency_path` function. Arguments: model_name (str): Name of the efficiency model. **extra_parameters (dict): Extra configuration parameters to override the entries in the `parameters` node loaded from the efficiency file. Raise: OSError: If the efficiency file does not exist. analysis.utils.config.ConfigError: If there is a problem with the efficiency model. """ path = get_efficiency_path(model_name) if not os.path.exists(path): raise OSError("Cannot find efficiency file -> {}".format(path)) config = load_config(path, validate=('model', 'variables', 'parameters')) return get_efficiency_model(config, **extra_parameters)
def run(config_files, link_from): """Run the script. If the efficiency file exists, only the plots are remade. Arguments: config_files (list[str]): Path to the configuration files. link_from (str): Path to link the results from. Raise: OSError: If there either the configuration file does not exist some of the input files cannot be found. KeyError: If some configuration data are missing. ValueError: If there is any problem in configuring the efficiency model. RuntimeError: If there is a problem during the efficiency fitting. """ try: config = _config.load_config(*config_files, validate=[ 'name', 'data/source', 'data/tree', 'parameters', 'model', 'variables' ]) except OSError: raise OSError( "Cannot load configuration files: {}".format(config_files)) except _config.ConfigError as error: if 'name' in error.missing_keys: logger.error("No name was specified in the config file!") if 'data/file' in error.missing_keys: logger.error("No input data specified in the config file!") if 'data/tree' in error.missing_keys: logger.error("No input data specified in the config file!") if 'model' in error.missing_keys: logger.error("No efficiency model specified in the config file!") if 'parameters' in error.missing_keys: logger.error( "No efficiency model parameters specified in the config file!") if 'variables' in error.missing_keys: logger.error( "No efficiency variables to model have been specified in the config file!" ) raise KeyError("ConfigError raised -> {}".format(error.missing_keys)) except KeyError as error: logger.error("YAML parsing error -> %s", error) raise # Do checks and load things plot_files = {} if config.get('plot', False): for var_name in config['variables']: plot_files[var_name] = get_efficiency_plot_path(config['name'], var=var_name) efficiency_class = get_efficiency_model_class(config['model']) if not efficiency_class: raise ValueError("Unknown efficiency model -> {}".format( config['model'])) # Let's do it # pylint: disable=E1101 if not all(os.path.exists(file_name) for file_name in plot_files.values()) or \ not os.path.exists(_paths.get_efficiency_path(config['name'])): # If plots don't exist, we load data logger.info("Loading data, this may take a while...") weight_var = config['data'].get('weight-var-name', None) # Prepare data config['data']['output-format'] = 'pandas' config['data']['variables'] = list(config['variables']) if weight_var: config['data']['variables'].append(weight_var) input_data = get_data(config['data'], **{'output-format': 'pandas'}) if weight_var: logger.info("Data loaded, using %s as weight", weight_var) else: logger.info("Data loaded, not using any weights") if not os.path.exists(_paths.get_efficiency_path(config['name'])): logger.info("Fitting efficiency model") try: eff = efficiency_class.fit(input_data, config['variables'], weight_var, **config['parameters']) except (ValueError, TypeError) as error: raise ValueError( "Cannot configure the efficiency model -> {}".format( error.message)) except KeyError as error: raise RuntimeError("Missing key -> {}".format(error)) except Exception as error: raise RuntimeError(error) output_file = eff.write_to_disk(config['name'], link_from) logger.info("Written efficiency file -> %s", output_file) else: logger.warning( "Output efficiency already exists, only redoing plots") eff = load_efficiency_model(config['name']) if plot_files: import seaborn as sns sns.set_style("white") plt.style.use('file://{}'.format( os.path.join(get_global_var('STYLE_PATH'), 'matplotlib_LHCb.mplstyle'))) plots = eff.plot(input_data, weight_var, labels=config.get('plot-labels', {})) for var_name, plot in plots.items(): logger.info("Plotting '%s' efficiency -> %s", var_name, plot_files[var_name]) plot.savefig(plot_files[var_name], bbox_inches='tight') else: logger.info("Efficiency file exists: %s. Nothing to do!", _paths.get_efficiency_path(config['name']))