Esempio n. 1
0
    def run(self):
        with self.input()["merged"].localize("r") as inp:
            # write the path of the input file to a temporary file
            samples_file = law.LocalFileTarget(is_tmp=True)
            samples_file.touch(content="{}\n".format(inp.path))

            # tmp dir for output files
            tmp_dir = law.LocalDirectoryTarget(is_tmp=True)

            # create the conversion command
            compile_task = self.requires()["deepjetcore"]
            cmd = """
                {} &&
                export HGCALML="$HGC_BASE/modules/HGCalML"
                export DEEPJETCORE_SUBPACKAGE="$HGCALML"
                export PYTHONPATH="$HGCALML/modules:$HGCALML/modules/datastructures:$PYTHONPATH"
                convertFromRoot.py -n 0 --noRelativePaths -c TrainData_{} -o "{}" -i "{}"
            """.format(compile_task.get_setup_cmd(), self.data_structure, tmp_dir.path,
                samples_file.path)

            # run the command
            code = law.util.interruptable_popen(cmd, env=compile_task.get_setup_env(), shell=True,
                executable="/bin/bash")[0]
            if code != 0:
                raise Exception("convertFromRoot.py failed")

        outp = self.output()
        outp["x"].copy_from_local(tmp_dir.child(outp["x"].basename))
        outp["y"].copy_from_local(tmp_dir.child(outp["y"].basename))
        outp["meta"].copy_from_local(tmp_dir.child(outp["meta"].basename))
        outp["dc"].copy_from_local(tmp_dir.child("dataCollection.dc"))
Esempio n. 2
0
    def merge(self, inputs, output):
        tmp_dir = law.LocalDirectoryTarget(is_tmp=True)
        tmp_dir.touch()

        # fetch inputs
        with self.publish_step("fetching inputs ..."):

            def fetch(inp):
                inp.copy_to_local(tmp_dir.child(inp.unique_basename, type="f"),
                                  cache=False)
                return inp.unique_basename

            def callback(i):
                self.publish_message("fetch file {} / {}".format(
                    i + 1, len(inputs)))

            bases = law.util.map_verbose(fetch,
                                         inputs,
                                         every=5,
                                         callback=callback)

        # merge using hadd
        with self.publish_step("merging ..."):
            with output.localize("w", cache=False) as tmp_out:
                cmd = "hadd -O -n 0 -d {} {} {}".format(
                    tmp_dir.path, tmp_out.path, " ".join(bases))
                code = law.util.interruptable_popen(cmd,
                                                    shell="True",
                                                    executable="/bin/bash",
                                                    cwd=tmp_dir.path)[0]
                if code != 0:
                    raise Exception("hadd failed")

                self.publish_message("merged file size: {:.2f} {}".format(
                    *law.util.human_bytes(os.stat(tmp_out.path).st_size)))
Esempio n. 3
0
    def run(self):
        # create a tmp dir
        tmp_dir = law.LocalDirectoryTarget(is_tmp=True)
        tmp_dir.touch()

        # download all setup files
        def download(src):
            h = self.create_path_hash(src)
            if h is None:
                return
            self.publish_message("download {}".format(src))
            dst = os.path.join(tmp_dir.path, h)
            if src.startswith("http"):
                wget(src, dst)
            # if afs is not available on our system, use scp
            elif src.startswith("/afs") and not os.path.exists(src):
                p = subprocess.Popen(
                    ["scp", "{}:{}".format(self.afs_host, src), dst])
                p.communicate()  # wait for transfer to finish
            else:
                shutil.copy2(src, dst)

            if not os.path.exists(dst):
                raise Exception("File copy failed!")

        law.util.map_struct(download, self.source_files)

        # create a tmp archive
        tmp_arc = law.LocalFileTarget(is_tmp="tgz")
        tmp_arc.dump(tmp_dir)

        # transfer
        self.transfer(tmp_arc)
Esempio n. 4
0
    def localize(self, **kwargs):
        # load the archive and unpack it into a temporary directory
        tmp_dir = law.LocalDirectoryTarget(path="$CMSSW_BASE/tmp/{}".format(
            str(uuid.uuid4())),
                                           is_tmp=True)
        output = self.output()
        if self.replicas >= 1:
            output = output.random_target()
        output.load(tmp_dir, **kwargs)

        def abspath(path):
            h = self.create_path_hash(path)
            return h and os.path.join(tmp_dir.path, h)

        return tmp_dir, law.util.map_struct(abspath, self.source_files)
    def run(self):
        # load input arrays per dataset, map them to the first linked process
        events = OrderedDict()
        for dataset, inp in self.input().items():
            process = list(dataset.processes.values())[0]
            events[process] = inp.load(allow_pickle=True)["events"]
            self.publish_message("loaded events for dataset {}".format(dataset.name))

        tmp_dir = law.LocalDirectoryTarget(is_tmp=True)
        tmp_dir.touch()

        for variable in self.config_inst.variables:
            stack_plot(events, variable, tmp_dir.child(variable.name + ".pdf", "f").path)
            self.publish_message("written histogram for variable {}".format(variable.name))

        with self.output().localize("w") as tmp:
            tmp.dump(tmp_dir)
Esempio n. 6
0
def text_to_process(content, name="INTERACTIVE"):
    """
    Loads the *content* of a CMSSW Python config file from a string, creates a ``cms.Process`` named
    *name* and returns it. This function requires a CMSSW environment.
    """
    import FWCore.ParameterSet.Config as cms

    # create a tmp dir
    tmp_dir = law.LocalDirectoryTarget(is_tmp=True)
    tmp_dir.touch()

    # dump the file content and make it importable
    tmp_dir.child("cfg.py", type="f").dump(content, formatter="text")
    with law.util.patch_object(sys,
                               "path", [tmp_dir.path] + sys.path,
                               lock=True):
        import cfg

    process = cms.Process(name)
    process.extend(cfg)

    return process
Esempio n. 7
0
    def run(self):
        # determine the converter executable
        inp = self.input()
        converter = inp["converter"].path
        converter_dir = inp["converter"].parent

        # read the config template
        with converter_dir.child("config/config_template.txt").open("r") as f:
            template = f.read()

        # temporary output directory
        output_dir = law.LocalDirectoryTarget(is_tmp=True)
        output_dir.touch()

        # fill template variables
        with inp["ntup"].localize("r") as ntup_file:
            config = template.format(
                input_dir=ntup_file.parent.path,
                input_file=ntup_file.basename,
                output_dir=output_dir.path,
                hist_output_file="no_used.root",
                skim_output_prefix="output_file_",
            )

            # create a config file required by the converter
            config_file = law.LocalFileTarget(is_tmp=True)
            with config_file.open("w") as f:
                f.write(config)

            # run the converter
            env_script = converter_dir.child("env.sh").path
            cmd = "source {} '' && {} {}".format(env_script, converter, config_file.path)
            code = law.util.interruptable_popen(cmd, shell=True, executable="/bin/bash")[0]
            if code != 0:
                raise Exception("conversion failed")

        # determine the skim output file and
        output_basename = output_dir.glob("output_file_*")[0]
        self.output().copy_from_local(output_dir.child(output_basename))
Esempio n. 8
0
 def lsf_output_directory(self):
     # the directory where submission meta data should be stored
     return law.LocalDirectoryTarget(self.local_path())
Esempio n. 9
0
 def output(self):
     return law.LocalDirectoryTarget('{}/{}/Absolute/{}/{}'.format(
         self.plots_dir, self.name, self.branch_data['channel'],
         self.branch_data['observable']))
Esempio n. 10
0
 def htcondor_output_directory(self):
     return law.LocalDirectoryTarget(self.local_path(store="$HGC_STORE"))
Esempio n. 11
0
    def run(self):
        lfn = self.input()["lfns"].random_target().load()[self.branch_data]

        setup_files_dir, setup_files = self.requires()["files"].localize()

        # create the temporary dir to run in
        tmp_dir = law.LocalDirectoryTarget(is_tmp=True)
        tmp_dir.touch()

        # manage jes files
        data_src = self.dataset_inst.data_source
        jes_versions = self.config_inst.get_aux("jes_version")[data_src]
        jes_levels = self.config_inst.get_aux("jes_levels")[data_src]
        jes_ranges = law.util.flatten(tpl[:2] for tpl in jes_versions)
        jes_files_dict = setup_files["jes_files"][data_src]
        jes_files = law.util.flatten(
            [[jes_files_dict[version][level] for level in jes_levels]
             for _, _, version in jes_versions])
        jes_unc_files = [
            jes_files_dict[version]["Uncertainty"]
            for _, _, version in jes_versions
        ]
        jes_unc_src_file = setup_files[
            "jes_unc_src_file"] if self.dataset_inst.is_mc else ""

        # determine the xrd redirector and download the file
        redirector = xrd_redirectors[1]  #determine_xrd_redirector(lfn)
        xrd_url = "root://{}/{}".format(redirector, lfn)

        if self.stream_input_file:
            input_file = xrd_url
        else:
            input_file = "input_file.root"
            cmd = "xrdcp {} {}".format(xrd_url, input_file)
            for _ in range(self.xrdcp_attempts):
                with self.publish_step(
                        "download input file from {} ...".format(xrd_url)):
                    code = law.util.interruptable_popen(
                        cmd,
                        shell=True,
                        cwd=tmp_dir.path,
                        executable="/bin/bash")[0]
                    if code == 0:
                        break
            else:
                raise Exception("xrdcp failed")
            input_file = "file://" + os.path.join(tmp_dir.path, input_file)

        # cmsRun argument helper
        def cmsRunArg(key, value):
            return " ".join("{}={}".format(key, v)
                            for v in law.util.make_list(value))

        output = self.output()
        # get global tag from dataset if defined, otherwise take default from config
        global_tag = self.dataset_inst.get_aux(
            "global_tag",
            self.config_inst.get_aux("global_tag")[data_src])
        with output["tree"].localize("w") as tmp_tree, output["meta"].localize(
                "w") as tmp_meta:
            args = [
                ("inputFiles", input_file),
                ("outputFile", tmp_tree.path),
                ("campaign", self.config_inst.campaign.name),
                ("metaDataFile", tmp_meta.path),
                ("isData", self.dataset_inst.is_data),
                ("globalTag", global_tag),
                ("lumiFile", setup_files["lumi_file"]),
                ("metFilters",
                 self.config_inst.get_aux("metFilters")[data_src]),
                ("jesFiles", jes_files),
                ("jesRanges", jes_ranges),
                ("jesUncFiles", jes_unc_files),
                ("jesUncSrcFile", jes_unc_src_file),
                ("jesUncSources",
                 self.config_inst.get_aux("jes_sources_{}".format(
                     self.config_inst.get_aux("jes_scheme")))),
                ("jerPtResolutionFile",
                 setup_files["jer_files"]["PtResolution"]),
                ("jerScaleFactorFile", setup_files["jer_files"]["SF"]),
                ("deepCSVWP", self.config_inst.get_aux("working_points")
                 ["deepcsv"]["medium"]),
                ("deepJetWP", self.config_inst.get_aux("working_points")
                 ["deepjet"]["medium"]),
            ]

            # triggers
            for channel_inst, triggers in self.config_inst.get_aux(
                    "triggers").items():
                # special rules may apply for real datasets as triggers can be run dependent
                if self.dataset_inst.is_data:
                    d_ch = self.config_inst.get_aux("dataset_channels")[
                        self.dataset_inst]
                    if d_ch == channel_inst:
                        triggers = self.config_inst.get_aux(
                            "data_triggers").get(self.dataset_inst, triggers)
                args.append((channel_inst.name + "Triggers", triggers))

            # lepton channel for data
            if self.dataset_inst.is_data:
                ch = self.config_inst.get_aux("dataset_channels")[
                    self.dataset_inst].name
                args.append(("leptonChannel", ch))

            # max events
            if not law.is_no_param(self.max_events):
                args.append(("maxEvents", self.max_events))

            # build the cmsRun command
            cfg_file = "treeMaker_cfg.py"
            cmd = "cmsRun " + law.util.rel_path(__file__, "files", cfg_file)
            cmd += " " + " ".join(cmsRunArg(*tpl) for tpl in args)

            # create environment
            env = os.environ.copy()
            env["CMSSW_SEARCH_PATH"] += ":" + setup_files_dir.path
            print("running command: {}".format(cmd))
            for obj in law.util.readable_popen(cmd,
                                               shell=True,
                                               executable="/bin/bash",
                                               cwd=tmp_dir.path,
                                               env=env):
                if isinstance(obj, six.string_types):
                    print(obj)
                    if obj.startswith("Begin processing the"):
                        self._publish_message(obj)
                else:
                    if obj.returncode != 0:
                        raise Exception("cmsRun failed")

            if not tmp_tree.exists():
                raise Exception("output file not exising after cmsRun")
Esempio n. 12
0
    def run(self):
        if not law.util.check_bool_flag(os.getenv("JTSF_ON_LXPLUS")):
            raise Exception("{} must run on lxplus".format(
                self.__class__.__name__))

        setup_files_dir, setup_files = self.requires().localize()
        triggers = self.config_inst.get_aux("triggers")[self.channel_inst]
        uid = str(uuid.uuid4())

        # a tmp dir
        tmp = law.LocalDirectoryTarget(is_tmp=True)
        tmp.touch()

        # build the command
        triggers_str = " ".join(triggers)
        begin_end = "--begin {} --end {}".format(
            *self.run_range) if self.run_range else ""
        cmd = """
            export PATH="$( pwd )/bin:/afs/cern.ch/cms/lumi/brilconda/bin:$PATH"
            export PYTHONPATH="$( pwd )/lib/python2.7/site-packages:$PYTHONPATH"
            source activate root
            pip install --prefix . --ignore-installed brilws
            >&2 echo "using brilcalc $( brilcalc --version ) from $( which brilcalc )"
            >&2 echo "lumi file: {lumi_file}"
            >&2 echo "norm file: {normtag_file}"
            >&2 echo "triggers : {triggers}"
            >&2 echo "run range: {begin_end}"
            for HLTPATH in {triggers}; do
                >&2 echo "calculate lumi for trigger path $HLTPATH ..."
                brilcalc lumi \
                    -u /pb \
                    --hltpath "$HLTPATH" \
                    --normtag "{normtag_file}" \
                    -i "{lumi_file}" \
                    -b "STABLE BEAMS" \
                    {begin_end} \
                || exit "$?"
                echo "{uid}"
                >&2 echo "done"
            done
        """.format(lumi_file=setup_files["lumi_file"],
                   normtag_file=self.config_inst.get_aux("normtag_file"),
                   triggers=triggers_str,
                   begin_end=begin_end,
                   uid=uid)

        # run the command
        code, out, _ = law.util.interruptable_popen(cmd,
                                                    shell=True,
                                                    executable="/bin/bash",
                                                    stdout=subprocess.PIPE,
                                                    cwd=tmp.path)
        if code != 0:
            raise Exception("brilcalc failed")

        # parse the output
        blocks = out.split(uid)[:-1]
        lumi_data = {}
        for trigger, block in zip(triggers, blocks):
            lines = block[:block.find("#Summary")].strip().split("\n")[:-1]

            # traverse backwards until a line does not start with "|"
            # columns: run:fill, time, ncms, hltpath, delivered, recorded
            while lines:
                line = lines.pop().strip()
                if not line.startswith("|"):
                    break

                parts = [p.strip() for p in line.split("|")[1:-1]]
                run = int(parts[0].split(":")[0])
                path = parts[3]
                lumi = float(parts[5])
                lumi_data.setdefault(run, {})[path] = lumi

        # calculate the lumi
        lumi = 0.
        for data in lumi_data.values():
            # data is a dict "hlt path -> lumi" per run
            # multiple elements mean that multiple, OR-connected triggers were active in that run
            # in this case, use the maximum as smaller values result from prescales
            lumi += max(list(data.values()))

        self.publish_message("Integrated luminosity: {} /pb".format(lumi))