def run(self): with self.input()["merged"].localize("r") as inp: # write the path of the input file to a temporary file samples_file = law.LocalFileTarget(is_tmp=True) samples_file.touch(content="{}\n".format(inp.path)) # tmp dir for output files tmp_dir = law.LocalDirectoryTarget(is_tmp=True) # create the conversion command compile_task = self.requires()["deepjetcore"] cmd = """ {} && export HGCALML="$HGC_BASE/modules/HGCalML" export DEEPJETCORE_SUBPACKAGE="$HGCALML" export PYTHONPATH="$HGCALML/modules:$HGCALML/modules/datastructures:$PYTHONPATH" convertFromRoot.py -n 0 --noRelativePaths -c TrainData_{} -o "{}" -i "{}" """.format(compile_task.get_setup_cmd(), self.data_structure, tmp_dir.path, samples_file.path) # run the command code = law.util.interruptable_popen(cmd, env=compile_task.get_setup_env(), shell=True, executable="/bin/bash")[0] if code != 0: raise Exception("convertFromRoot.py failed") outp = self.output() outp["x"].copy_from_local(tmp_dir.child(outp["x"].basename)) outp["y"].copy_from_local(tmp_dir.child(outp["y"].basename)) outp["meta"].copy_from_local(tmp_dir.child(outp["meta"].basename)) outp["dc"].copy_from_local(tmp_dir.child("dataCollection.dc"))
def merge(self, inputs, output): tmp_dir = law.LocalDirectoryTarget(is_tmp=True) tmp_dir.touch() # fetch inputs with self.publish_step("fetching inputs ..."): def fetch(inp): inp.copy_to_local(tmp_dir.child(inp.unique_basename, type="f"), cache=False) return inp.unique_basename def callback(i): self.publish_message("fetch file {} / {}".format( i + 1, len(inputs))) bases = law.util.map_verbose(fetch, inputs, every=5, callback=callback) # merge using hadd with self.publish_step("merging ..."): with output.localize("w", cache=False) as tmp_out: cmd = "hadd -O -n 0 -d {} {} {}".format( tmp_dir.path, tmp_out.path, " ".join(bases)) code = law.util.interruptable_popen(cmd, shell="True", executable="/bin/bash", cwd=tmp_dir.path)[0] if code != 0: raise Exception("hadd failed") self.publish_message("merged file size: {:.2f} {}".format( *law.util.human_bytes(os.stat(tmp_out.path).st_size)))
def run(self): # create a tmp dir tmp_dir = law.LocalDirectoryTarget(is_tmp=True) tmp_dir.touch() # download all setup files def download(src): h = self.create_path_hash(src) if h is None: return self.publish_message("download {}".format(src)) dst = os.path.join(tmp_dir.path, h) if src.startswith("http"): wget(src, dst) # if afs is not available on our system, use scp elif src.startswith("/afs") and not os.path.exists(src): p = subprocess.Popen( ["scp", "{}:{}".format(self.afs_host, src), dst]) p.communicate() # wait for transfer to finish else: shutil.copy2(src, dst) if not os.path.exists(dst): raise Exception("File copy failed!") law.util.map_struct(download, self.source_files) # create a tmp archive tmp_arc = law.LocalFileTarget(is_tmp="tgz") tmp_arc.dump(tmp_dir) # transfer self.transfer(tmp_arc)
def localize(self, **kwargs): # load the archive and unpack it into a temporary directory tmp_dir = law.LocalDirectoryTarget(path="$CMSSW_BASE/tmp/{}".format( str(uuid.uuid4())), is_tmp=True) output = self.output() if self.replicas >= 1: output = output.random_target() output.load(tmp_dir, **kwargs) def abspath(path): h = self.create_path_hash(path) return h and os.path.join(tmp_dir.path, h) return tmp_dir, law.util.map_struct(abspath, self.source_files)
def run(self): # load input arrays per dataset, map them to the first linked process events = OrderedDict() for dataset, inp in self.input().items(): process = list(dataset.processes.values())[0] events[process] = inp.load(allow_pickle=True)["events"] self.publish_message("loaded events for dataset {}".format(dataset.name)) tmp_dir = law.LocalDirectoryTarget(is_tmp=True) tmp_dir.touch() for variable in self.config_inst.variables: stack_plot(events, variable, tmp_dir.child(variable.name + ".pdf", "f").path) self.publish_message("written histogram for variable {}".format(variable.name)) with self.output().localize("w") as tmp: tmp.dump(tmp_dir)
def text_to_process(content, name="INTERACTIVE"): """ Loads the *content* of a CMSSW Python config file from a string, creates a ``cms.Process`` named *name* and returns it. This function requires a CMSSW environment. """ import FWCore.ParameterSet.Config as cms # create a tmp dir tmp_dir = law.LocalDirectoryTarget(is_tmp=True) tmp_dir.touch() # dump the file content and make it importable tmp_dir.child("cfg.py", type="f").dump(content, formatter="text") with law.util.patch_object(sys, "path", [tmp_dir.path] + sys.path, lock=True): import cfg process = cms.Process(name) process.extend(cfg) return process
def run(self): # determine the converter executable inp = self.input() converter = inp["converter"].path converter_dir = inp["converter"].parent # read the config template with converter_dir.child("config/config_template.txt").open("r") as f: template = f.read() # temporary output directory output_dir = law.LocalDirectoryTarget(is_tmp=True) output_dir.touch() # fill template variables with inp["ntup"].localize("r") as ntup_file: config = template.format( input_dir=ntup_file.parent.path, input_file=ntup_file.basename, output_dir=output_dir.path, hist_output_file="no_used.root", skim_output_prefix="output_file_", ) # create a config file required by the converter config_file = law.LocalFileTarget(is_tmp=True) with config_file.open("w") as f: f.write(config) # run the converter env_script = converter_dir.child("env.sh").path cmd = "source {} '' && {} {}".format(env_script, converter, config_file.path) code = law.util.interruptable_popen(cmd, shell=True, executable="/bin/bash")[0] if code != 0: raise Exception("conversion failed") # determine the skim output file and output_basename = output_dir.glob("output_file_*")[0] self.output().copy_from_local(output_dir.child(output_basename))
def lsf_output_directory(self): # the directory where submission meta data should be stored return law.LocalDirectoryTarget(self.local_path())
def output(self): return law.LocalDirectoryTarget('{}/{}/Absolute/{}/{}'.format( self.plots_dir, self.name, self.branch_data['channel'], self.branch_data['observable']))
def htcondor_output_directory(self): return law.LocalDirectoryTarget(self.local_path(store="$HGC_STORE"))
def run(self): lfn = self.input()["lfns"].random_target().load()[self.branch_data] setup_files_dir, setup_files = self.requires()["files"].localize() # create the temporary dir to run in tmp_dir = law.LocalDirectoryTarget(is_tmp=True) tmp_dir.touch() # manage jes files data_src = self.dataset_inst.data_source jes_versions = self.config_inst.get_aux("jes_version")[data_src] jes_levels = self.config_inst.get_aux("jes_levels")[data_src] jes_ranges = law.util.flatten(tpl[:2] for tpl in jes_versions) jes_files_dict = setup_files["jes_files"][data_src] jes_files = law.util.flatten( [[jes_files_dict[version][level] for level in jes_levels] for _, _, version in jes_versions]) jes_unc_files = [ jes_files_dict[version]["Uncertainty"] for _, _, version in jes_versions ] jes_unc_src_file = setup_files[ "jes_unc_src_file"] if self.dataset_inst.is_mc else "" # determine the xrd redirector and download the file redirector = xrd_redirectors[1] #determine_xrd_redirector(lfn) xrd_url = "root://{}/{}".format(redirector, lfn) if self.stream_input_file: input_file = xrd_url else: input_file = "input_file.root" cmd = "xrdcp {} {}".format(xrd_url, input_file) for _ in range(self.xrdcp_attempts): with self.publish_step( "download input file from {} ...".format(xrd_url)): code = law.util.interruptable_popen( cmd, shell=True, cwd=tmp_dir.path, executable="/bin/bash")[0] if code == 0: break else: raise Exception("xrdcp failed") input_file = "file://" + os.path.join(tmp_dir.path, input_file) # cmsRun argument helper def cmsRunArg(key, value): return " ".join("{}={}".format(key, v) for v in law.util.make_list(value)) output = self.output() # get global tag from dataset if defined, otherwise take default from config global_tag = self.dataset_inst.get_aux( "global_tag", self.config_inst.get_aux("global_tag")[data_src]) with output["tree"].localize("w") as tmp_tree, output["meta"].localize( "w") as tmp_meta: args = [ ("inputFiles", input_file), ("outputFile", tmp_tree.path), ("campaign", self.config_inst.campaign.name), ("metaDataFile", tmp_meta.path), ("isData", self.dataset_inst.is_data), ("globalTag", global_tag), ("lumiFile", setup_files["lumi_file"]), ("metFilters", self.config_inst.get_aux("metFilters")[data_src]), ("jesFiles", jes_files), ("jesRanges", jes_ranges), ("jesUncFiles", jes_unc_files), ("jesUncSrcFile", jes_unc_src_file), ("jesUncSources", self.config_inst.get_aux("jes_sources_{}".format( self.config_inst.get_aux("jes_scheme")))), ("jerPtResolutionFile", setup_files["jer_files"]["PtResolution"]), ("jerScaleFactorFile", setup_files["jer_files"]["SF"]), ("deepCSVWP", self.config_inst.get_aux("working_points") ["deepcsv"]["medium"]), ("deepJetWP", self.config_inst.get_aux("working_points") ["deepjet"]["medium"]), ] # triggers for channel_inst, triggers in self.config_inst.get_aux( "triggers").items(): # special rules may apply for real datasets as triggers can be run dependent if self.dataset_inst.is_data: d_ch = self.config_inst.get_aux("dataset_channels")[ self.dataset_inst] if d_ch == channel_inst: triggers = self.config_inst.get_aux( "data_triggers").get(self.dataset_inst, triggers) args.append((channel_inst.name + "Triggers", triggers)) # lepton channel for data if self.dataset_inst.is_data: ch = self.config_inst.get_aux("dataset_channels")[ self.dataset_inst].name args.append(("leptonChannel", ch)) # max events if not law.is_no_param(self.max_events): args.append(("maxEvents", self.max_events)) # build the cmsRun command cfg_file = "treeMaker_cfg.py" cmd = "cmsRun " + law.util.rel_path(__file__, "files", cfg_file) cmd += " " + " ".join(cmsRunArg(*tpl) for tpl in args) # create environment env = os.environ.copy() env["CMSSW_SEARCH_PATH"] += ":" + setup_files_dir.path print("running command: {}".format(cmd)) for obj in law.util.readable_popen(cmd, shell=True, executable="/bin/bash", cwd=tmp_dir.path, env=env): if isinstance(obj, six.string_types): print(obj) if obj.startswith("Begin processing the"): self._publish_message(obj) else: if obj.returncode != 0: raise Exception("cmsRun failed") if not tmp_tree.exists(): raise Exception("output file not exising after cmsRun")
def run(self): if not law.util.check_bool_flag(os.getenv("JTSF_ON_LXPLUS")): raise Exception("{} must run on lxplus".format( self.__class__.__name__)) setup_files_dir, setup_files = self.requires().localize() triggers = self.config_inst.get_aux("triggers")[self.channel_inst] uid = str(uuid.uuid4()) # a tmp dir tmp = law.LocalDirectoryTarget(is_tmp=True) tmp.touch() # build the command triggers_str = " ".join(triggers) begin_end = "--begin {} --end {}".format( *self.run_range) if self.run_range else "" cmd = """ export PATH="$( pwd )/bin:/afs/cern.ch/cms/lumi/brilconda/bin:$PATH" export PYTHONPATH="$( pwd )/lib/python2.7/site-packages:$PYTHONPATH" source activate root pip install --prefix . --ignore-installed brilws >&2 echo "using brilcalc $( brilcalc --version ) from $( which brilcalc )" >&2 echo "lumi file: {lumi_file}" >&2 echo "norm file: {normtag_file}" >&2 echo "triggers : {triggers}" >&2 echo "run range: {begin_end}" for HLTPATH in {triggers}; do >&2 echo "calculate lumi for trigger path $HLTPATH ..." brilcalc lumi \ -u /pb \ --hltpath "$HLTPATH" \ --normtag "{normtag_file}" \ -i "{lumi_file}" \ -b "STABLE BEAMS" \ {begin_end} \ || exit "$?" echo "{uid}" >&2 echo "done" done """.format(lumi_file=setup_files["lumi_file"], normtag_file=self.config_inst.get_aux("normtag_file"), triggers=triggers_str, begin_end=begin_end, uid=uid) # run the command code, out, _ = law.util.interruptable_popen(cmd, shell=True, executable="/bin/bash", stdout=subprocess.PIPE, cwd=tmp.path) if code != 0: raise Exception("brilcalc failed") # parse the output blocks = out.split(uid)[:-1] lumi_data = {} for trigger, block in zip(triggers, blocks): lines = block[:block.find("#Summary")].strip().split("\n")[:-1] # traverse backwards until a line does not start with "|" # columns: run:fill, time, ncms, hltpath, delivered, recorded while lines: line = lines.pop().strip() if not line.startswith("|"): break parts = [p.strip() for p in line.split("|")[1:-1]] run = int(parts[0].split(":")[0]) path = parts[3] lumi = float(parts[5]) lumi_data.setdefault(run, {})[path] = lumi # calculate the lumi lumi = 0. for data in lumi_data.values(): # data is a dict "hlt path -> lumi" per run # multiple elements mean that multiple, OR-connected triggers were active in that run # in this case, use the maximum as smaller values result from prescales lumi += max(list(data.values())) self.publish_message("Integrated luminosity: {} /pb".format(lumi))