Ejemplo n.º 1
0
def extract_nuisance_parameters_from_lhe_file(filename, systematics):
    """ Extracts the definition of nuisance parameters from the LHE file """

    logger.debug("Parsing nuisance parameter setup from LHE file at %s",
                 filename)

    # Nuisance parameters (output)
    nuisance_params = OrderedDict()

    # When no systematics setup is defined
    if systematics is None:
        return nuisance_params

    # Parse scale factors from strings in systematics
    logger.debug("Systematics setup: %s", systematics)

    systematics_scales = []
    for key, value in six.iteritems(systematics):
        if key in ["mur", "muf", "mu"]:
            scale_factors = value.split(",")
            scale_factors = [float(sf) for sf in scale_factors]

            if len(scale_factors) == 0:
                raise RuntimeError("Cannot parse scale factor string %s",
                                   value)
            elif len(scale_factors) == 1:
                scale_factors = (scale_factors[0], )
            else:
                scale_factors = (scale_factors[-1], scale_factors[0])
            systematics_scales.append(scale_factors)
        else:
            systematics_scales.append(None)

    # Untar and parse LHE file
    root, _ = _untar_and_parse_lhe_file(filename)

    # Find weight groups
    try:
        weight_groups = root.findall("header")[0].findall(
            "initrwgt")[0].findall("weightgroup")
    except KeyError as e:
        raise RuntimeError("Could not find weight groups in LHE file!\n%s", e)

    if len(weight_groups) == 0:
        raise RuntimeError("Zero weight groups in LHE file!")

    # What have we already found?
    systematics_scale_done = []
    for val in systematics_scales:
        if val is None:
            systematics_scale_done.append([True, True])
        elif len(val) == 1:
            systematics_scale_done.append([False, True])
        else:
            systematics_scale_done.append([False, False])

    systematics_pdf_done = False

    # Loop over weight groups and weights and identify benchmarks
    for wg in weight_groups:
        try:
            wg_name = wg.attrib["name"]
        except KeyError:
            logger.warning("Weight group does not have name attribute")
            continue

        if "mg_reweighting" in wg_name.lower():  # Physics reweighting
            logger.debug("Found physics reweighting weight group %s", wg_name)
            continue

        elif ("mu" in systematics or "muf" in systematics
              or "mur" in systematics) and "scale variation" in wg_name.lower(
              ):  # Found scale variation weight group
            logger.debug("Found scale variation weight group %s", wg_name)

            weights = wg.findall("weight")

            for weight in weights:
                try:
                    weight_id = str(weight.attrib["id"])
                    weight_muf = float(weight.attrib["MUF"])
                    weight_mur = float(weight.attrib["MUR"])
                except KeyError:
                    logger.warning(
                        "Scale variation weight does not have all expected attributes"
                    )
                    continue

                logging.debug(
                    "Found scale variation weight %s / muf = %s, mur = %s",
                    weight_id, weight_muf, weight_mur)

                # Let's skip the entries with a varied dynamical scale for now
                weight_dynscale = None
                for key in ["dynscale", "dyn_scale", "DYNSCALE", "DYN_SCALE"]:
                    try:
                        weight_dynscale = int(weight.attrib["dynscale"])
                    except KeyError:
                        pass
                if weight_dynscale is not None:
                    continue

                # Matching time!
                for i, (syst_name, syst_scales, syst_done) in enumerate(
                        zip(systematics.keys(), systematics_scales,
                            systematics_scale_done)):
                    if syst_name == "mur":
                        for k in [0, 1]:
                            if (not syst_done[k] and approx_equal(
                                    weight_mur, syst_scales[k])
                                    and approx_equal(weight_muf, 1.0)):
                                try:
                                    benchmarks = nuisance_params[syst_name]
                                except KeyError:
                                    benchmarks = [None, None]

                                benchmarks[k] = weight_id
                                nuisance_params[syst_name] = benchmarks

                                systematics_scale_done[i][k] = True
                                break

                    if syst_name == "muf":
                        for k in [0, 1]:
                            if (not syst_done[k]
                                    and approx_equal(weight_mur, 1.0) and
                                    approx_equal(weight_muf, syst_scales[k])):
                                try:
                                    benchmarks = nuisance_params[syst_name]
                                except KeyError:
                                    benchmarks = [None, None]

                                benchmarks[k] = weight_id
                                nuisance_params[syst_name] = benchmarks

                                systematics_scale_done[i][k] = True
                                break

                    if syst_name == "mu":
                        for k in [0, 1]:
                            if (not syst_done[k] and approx_equal(
                                    weight_mur, syst_scales[k]) and
                                    approx_equal(weight_muf, syst_scales[k])):
                                try:
                                    benchmarks = nuisance_params[syst_name]
                                except KeyError:
                                    benchmarks = [None, None]

                                benchmarks[k] = weight_id
                                nuisance_params[syst_name] = benchmarks

                                systematics_scale_done[i][k] = True
                                break

        elif "pdf" in systematics and (systematics["pdf"] in wg_name.lower()
                                       or "pdf" in wg_name.lower() or "ct"
                                       in wg_name.lower()):  # PDF reweighting
            logger.debug("Found PDF variation weight group %s", wg_name)

            weights = wg.findall("weight")

            for i, weight in enumerate(weights):
                try:
                    weight_id = str(weight.attrib["id"])
                    weight_pdf = int(weight.attrib["PDF"])
                except KeyError:
                    logger.warning(
                        "Scale variation weight does not have all expected attributes"
                    )
                    continue

                logger.debug("Found PDF weight %s / %s", weight_id, weight_pdf)

                # Add every PDF Hessian direction to nuisance parameters
                nuisance_params["pdf_{}".format(i)] = [weight_id, None]

                systematics_pdf_done = True

        else:
            logging.debug("Found other weight group %s", wg_name)

    # Check that everything was found
    if "pdf" in systematics.keys() and not systematics_pdf_done:
        logger.warning(
            "Could not find weights for the PDF uncertainties in LHE file! The most common source of this"
            " error is not having installed LHAPDF with its Python interface. Please make sure that you "
            " have installed this. You can also check the log file produced by MadGraph for a warning"
            " about this. If LHAPDF is correctly installed and you still get this warning, please check"
            " manually whether the LHE file at %s contains weights from PDF variation, and contact"
            " the MadMiner developer team about this. If you continue with the analysis, MadMiner"
            " will disregard PDF uncertainties.",
            filename,
        )

    for syst_name, (done1, done2) in zip(systematics.keys(),
                                         systematics_scale_done):
        if not (done1 and done2):
            logger.warning(
                "Did not find benchmarks representing scale variation uncertainty %s in LHE file!",
                syst_name)
            logger.warning(
                "Could not find weights for the scale uncertainty %s in LHE file! The most common source of "
                " this error is not having installed LHAPDF with its Python interface. Please make sure that"
                " you have installed this. You can also check the log file produced by MadGraph for a "
                "warning about this. If LHAPDF is correctly installed and you still get this warning, please"
                " check manually whether the LHE file at %s contains weights from PDF variation, and contact"
                " the MadMiner developer team about this. If you continue with the analysis, MadMiner"
                " will disregard PDF uncertainties.",
                syst_name,
                filename,
            )

    return nuisance_params
Ejemplo n.º 2
0
def _extract_nuisance_param_dict(weight_groups, systematics_name,
                                 systematics_definition):
    logger.debug("Extracting nuisance parameter information for systematic %s",
                 systematics_name)

    syst_type = systematics_definition[0]

    if syst_type == "norm":
        nuisance_param_name = "{}_nuisance_param_0".format(systematics_name)
        benchmark_name = "{}_benchmark_0".format(nuisance_param_name)
        nuisance_param_definition = (benchmark_name,
                                     None), (None,
                                             None), systematics_definition[1]
        return {nuisance_param_name: nuisance_param_definition}

    elif syst_type == "scale":
        # Prepare output
        nuisance_param_definition_parts = []

        # Parse scale variations we need to find
        scale_factors = systematics_definition[2].split(",")
        scale_factors = [float(sf) for sf in scale_factors]
        if len(scale_factors) == 0:
            raise RuntimeError("Cannot parse scale factor string %s", value)
        elif len(scale_factors) == 1:
            scale_factors = (scale_factors[0], )
        else:
            scale_factors = (scale_factors[-1], scale_factors[0])

        # Loop over scale factors
        for k, scale_factor in enumerate(scale_factors):
            muf = scale_factor if systematics_definition[1] in ["mu", "muf"
                                                                ] else 1.0
            mur = scale_factor if systematics_definition[1] in ["mu", "mur"
                                                                ] else 1.0

            # Loop over weight groups and weights and identify benchmarks
            for wg in weight_groups:
                try:
                    wg_name = wg.attrib["name"]
                except KeyError:
                    logger.warning(
                        "New weight group: does not have name attribute, skipping"
                    )
                    continue
                logger.debug("New weight group: %s", wg_name)

                if "mg_reweighting" in wg_name.lower(
                ) or "scale variation" not in wg_name.lower():
                    continue
                logger.debug("Weight group identified as scale variation")

                weights = wg.findall("weight")

                for weight in weights:
                    try:
                        weight_id = str(weight.attrib["id"])
                        weight_muf = float(weight.attrib["MUF"])
                        weight_mur = float(weight.attrib["MUR"])
                    except KeyError:
                        logger.warning(
                            "Scale variation weight does not have all expected attributes"
                        )
                        continue

                    logging.debug(
                        "Found scale variation weight %s / muf = %s, mur = %s",
                        weight_id, weight_muf, weight_mur)

                    # Let's skip the entries with a varied dynamical scale for now
                    weight_dynscale = None
                    for key in [
                            "dynscale", "dyn_scale", "DYNSCALE", "DYN_SCALE"
                    ]:
                        try:
                            weight_dynscale = int(weight.attrib[key])
                        except KeyError:
                            pass
                    if weight_dynscale is not None:
                        continue

                    # Matching time!
                    if approx_equal(weight_mur, mur) and approx_equal(
                            weight_muf, muf):
                        benchmark_name = "{}_nuisance_param_0_benchmark_{}".format(
                            systematics_name, k)
                        nuisance_param_definition_parts.append(
                            (benchmark_name, weight_id))
                        break

        if len(nuisance_param_definition_parts) < len(scale_factors):
            logger.warning(
                "Could not find weights for the scale uncertainty %s in LHE file! The most common source of "
                " this error is not having installed LHAPDF with its Python interface. Please make sure that"
                " you have installed this. You can also check the log file produced by MadGraph for a "
                "warning about this. If LHAPDF is correctly installed and you still get this warning, please"
                " check manually whether the LHE file contains weights from PDF variation, and contact"
                " the MadMiner developer team about this. If you continue with the analysis, MadMiner"
                " will disregard PDF uncertainties.",
                systematics_name,
            )
            return {}
        else:
            # Output
            nuisance_param_name = "{}_nuisance_param_0".format(
                systematics_name)
            if len(nuisance_param_definition_parts) > 1:
                nuisance_dict = {
                    nuisance_param_name:
                    (nuisance_param_definition_parts[0],
                     nuisance_param_definition_parts[1], None)
                }
            else:
                nuisance_dict = {
                    nuisance_param_name:
                    (nuisance_param_definition_parts[0], (None, None), None)
                }
            return nuisance_dict

    elif syst_type == "pdf":
        nuisance_dict = OrderedDict()
        # Loop over weight groups and weights and identify benchmarks
        for wg in weight_groups:
            try:
                wg_name = wg.attrib["name"]
            except KeyError:
                logger.warning(
                    "New wWeight group: does not have name attribute, skipping"
                )
                continue
            logger.debug("New weight group: %s", wg_name)

            if "mg_reweighting" in wg_name.lower() or not (
                    systematics_definition[1] in wg_name.lower()
                    or "pdf" in wg_name.lower() or "ct" in wg_name.lower()):
                continue

            logger.debug("Weight group identified as PDF variation")
            weights = wg.findall("weight")

            for i, weight in enumerate(weights):
                try:
                    weight_id = str(weight.attrib["id"])
                    weight_pdf = int(weight.attrib["PDF"])
                except KeyError:
                    logger.warning(
                        "Scale variation weight does not have all expected attributes"
                    )
                    continue

                if weight_pdf % 1000 == 0:  # Central element, not eigenvector of covariance matrix
                    logger.debug(
                        "Identifying PDF weight %s / %s as central element",
                        weight_id, weight_pdf)
                    continue

                logger.debug("Found PDF weight %s / %s", weight_id, weight_pdf)

                # Add every PDF Hessian direction to nuisance parameters
                nuisance_param_name = "{}_nuisance_param_{}".format(
                    systematics_name, i)
                benchmark_name = "{}_benchmark_0".format(nuisance_param_name)
                nuisance_dict[nuisance_param_name] = (benchmark_name,
                                                      weight_id), (None,
                                                                   None), None

        # Check that everything was found
        if len(nuisance_dict) < 0:
            logger.warning(
                "Could not find weights for the PDF uncertainties in LHE file! The most common source of this"
                " error is not having installed LHAPDF with its Python interface. Please make sure that you "
                " have installed this. You can also check the log file produced by MadGraph for a warning"
                " about this. If LHAPDF is correctly installed and you still get this warning, please check"
                " manually whether the LHE file at %s contains weights from PDF variation, and contact"
                " the MadMiner developer team about this. If you continue with the analysis, MadMiner"
                " will disregard PDF uncertainties.")
        return nuisance_dict

    else:
        raise RuntimeError("Unknown systematics type %s", syst_type)