def run(self): with self.input()["merged"].localize("r") as inp: # write the path of the input file to a temporary file samples_file = law.LocalFileTarget(is_tmp=True) samples_file.touch(content="{}\n".format(inp.path)) # tmp dir for output files tmp_dir = law.LocalDirectoryTarget(is_tmp=True) # create the conversion command compile_task = self.requires()["deepjetcore"] cmd = """ {} && export HGCALML="$HGC_BASE/modules/HGCalML" export DEEPJETCORE_SUBPACKAGE="$HGCALML" export PYTHONPATH="$HGCALML/modules:$HGCALML/modules/datastructures:$PYTHONPATH" convertFromRoot.py -n 0 --noRelativePaths -c TrainData_{} -o "{}" -i "{}" """.format(compile_task.get_setup_cmd(), self.data_structure, tmp_dir.path, samples_file.path) # run the command code = law.util.interruptable_popen(cmd, env=compile_task.get_setup_env(), shell=True, executable="/bin/bash")[0] if code != 0: raise Exception("convertFromRoot.py failed") outp = self.output() outp["x"].copy_from_local(tmp_dir.child(outp["x"].basename)) outp["y"].copy_from_local(tmp_dir.child(outp["y"].basename)) outp["meta"].copy_from_local(tmp_dir.child(outp["meta"].basename)) outp["dc"].copy_from_local(tmp_dir.child("dataCollection.dc"))
def run(self): # create a tmp dir tmp_dir = law.LocalDirectoryTarget(is_tmp=True) tmp_dir.touch() # download all setup files def download(src): h = self.create_path_hash(src) if h is None: return self.publish_message("download {}".format(src)) dst = os.path.join(tmp_dir.path, h) if src.startswith("http"): wget(src, dst) # if afs is not available on our system, use scp elif src.startswith("/afs") and not os.path.exists(src): p = subprocess.Popen( ["scp", "{}:{}".format(self.afs_host, src), dst]) p.communicate() # wait for transfer to finish else: shutil.copy2(src, dst) if not os.path.exists(dst): raise Exception("File copy failed!") law.util.map_struct(download, self.source_files) # create a tmp archive tmp_arc = law.LocalFileTarget(is_tmp="tgz") tmp_arc.dump(tmp_dir) # transfer self.transfer(tmp_arc)
def output(self): basename = os.path.basename(FastProd(branch = self.branch).output().path) parts = basename.split('.') parts.pop() parts.pop() parts.append('log') outfile = '.'.join(parts) return law.LocalFileTarget('{}/{}/{}/{}'.format(self.merge_dir, self.name, self.branch_data['channel'], outfile))
def run(self): # create the local bundle self.source_path = self.env["JTSF_SOFTWARE"] + ".tgz" bundle = law.LocalFileTarget(self.source_path, is_tmp=True) def _filter(tarinfo): return None if re.search("(\.pyc|\/\.git|\.tgz)$", tarinfo.name) else tarinfo bundle.dump(os.path.splitext(self.source_path)[0], filter=_filter) self.publish_message("bundled software archive") # super run will upload all files for us super(UploadSoftware, self).run()
def run(self): # create the bundle bundle = law.LocalFileTarget(is_tmp="tgz") self.bundle(bundle) # log the size self.publish_message("bundled CMSSW archive, size is {:.2f} {}".format( *law.util.human_bytes(bundle.stat.st_size))) # transfer the bundle and mark the task as complete self.transfer(bundle) self.mark_complete()
def run(self): # create the bundle bundle = law.LocalFileTarget(is_tmp="tgz") self.bundle(bundle) # log the size self.publish_message( "bundled repository archive, size is {:.2f} {}".format( *law.util.human_bytes(bundle.stat.st_size))) # transfer the bundle self.transfer(bundle)
def run(self): lfns = [] for key in self.dataset_inst.keys: print("get lfns for key {}".format(key)) cmd = "dasgoclient -query='file dataset={}' -limit=0".format(key) code, out, _ = law.util.interruptable_popen(cmd, shell=True, stdout=subprocess.PIPE, executable="/bin/bash") if code != 0: raise Exception("dasgoclient query failed") lfns.extend(out.strip().split("\n")) if not (len(lfns) == self.dataset_inst.n_files): raise ValueError("Number of lfns does not match number of files " "for dataset {}".format(self.dataset_inst.name)) tmp = law.LocalFileTarget(is_tmp="json") tmp.dump(lfns) self.transfer(tmp)
def run(self): # determine the converter executable inp = self.input() converter = inp["converter"].path converter_dir = inp["converter"].parent # read the config template with converter_dir.child("config/config_template.txt").open("r") as f: template = f.read() # temporary output directory output_dir = law.LocalDirectoryTarget(is_tmp=True) output_dir.touch() # fill template variables with inp["ntup"].localize("r") as ntup_file: config = template.format( input_dir=ntup_file.parent.path, input_file=ntup_file.basename, output_dir=output_dir.path, hist_output_file="no_used.root", skim_output_prefix="output_file_", ) # create a config file required by the converter config_file = law.LocalFileTarget(is_tmp=True) with config_file.open("w") as f: f.write(config) # run the converter env_script = converter_dir.child("env.sh").path cmd = "source {} '' && {} {}".format(env_script, converter, config_file.path) code = law.util.interruptable_popen(cmd, shell=True, executable="/bin/bash")[0] if code != 0: raise Exception("conversion failed") # determine the skim output file and output_basename = output_dir.glob("output_file_*")[0] self.output().copy_from_local(output_dir.child(output_basename))
def run(self): software_path = os.environ["HGC_SOFTWARE"] # create the local bundle bundle = law.LocalFileTarget(software_path + ".tgz", is_tmp=True) def _filter(tarinfo): if re.search(r"(\.pyc|\/\.git|\.tgz|__pycache__)$", tarinfo.name): return None return tarinfo # create the archive with a custom filter bundle.dump(software_path, filter=_filter) # log the size self.publish_message( "bundled software archive, size is {:.2f} {}".format( *law.util.human_bytes(bundle.stat.st_size))) # transfer the bundle and mark the task as complete self.transfer(bundle) self.mark_complete()
def local_target(self, *path, **kwargs): return law.LocalFileTarget(self.local_path(*path), **kwargs)
def local_target(self, *path): return law.LocalFileTarget(self.local_path(*path))
def output(self): return law.LocalFileTarget("data/docker/numbers_%i.txt" % self.n_nums)
def run(self): # read menu data menu_data = self.input().load(formatter="json") # expand run numbers if self.run_numbers: run_numbers = set() for r in self.run_numbers: if r.count("-") == 1: start, end = [int(s) for s in r.split("-")] run_numbers |= set(range(start, end + 1)) else: run_numbers.add(int(r)) run_numbers = sorted(list(run_numbers)) else: lumi_data = law.LocalFileTarget(self.lumi_file).load(formatter="json") run_numbers = [int(r) for r in lumi_data.keys()] # reduce menu data to a simple mapping menu -> valid runs menu_runs = { menu: [r for r in data["runs"] if r in run_numbers] for menu, data in six.iteritems(menu_data) } menu_runs = { menu: runs for menu, runs in six.iteritems(menu_runs) if runs } self.publish_message("found {} trigger menus".format(len(menu_runs))) # get all paths for all menus paths_inputs = yield { menu: GetPathsFromMenu.req(self, hlt_menu=menu) for menu in menu_runs } menu_paths = { menu: sorted(inp.load(formatter="json")) for menu, inp in six.iteritems(paths_inputs) } # filter by given hlt path patterns if self.hlt_paths: menu_paths = { menu: [p for p in paths if law.util.multi_match(p, self.hlt_paths, mode=any)] for menu, paths in six.iteritems(menu_paths) } menu_paths = { menu: paths for menu, paths in six.iteritems(menu_paths) if paths } # merge output data data = { menu: dict(runs=menu_runs[menu], paths=paths) for menu, paths in six.iteritems(menu_paths) } # save the output and print the summary output = self.output() output.parent.touch() output.dump(data, indent=4, formatter="json") self.summary()
def output(self): return law.LocalFileTarget( '{}/{}/Combined/Final/{}.{}.{}.tab.gz'.format( self.merge_dir, self.name, self.process, self.branch_data['order'], self.branch_data['obs']))
def output(self): return law.LocalFileTarget(self.local_path("data.npz"))
def run(self): bundle = law.LocalFileTarget(is_tmp="tgz") self.bundle(bundle) self.transfer(bundle)
def run(self): # strategy: # 1. Get the list of valid run numbers from the lumi file. # 2. Get all menus and associated runs, and filter the latter by (1). # 3. Filter menus using the provided menu patterns. # 4. For all menus, get the list of paths and filter them using the provided path patterns. # 5. Get filter names for each menu and path combination. # 6. Save the data. # coloring and colored formatter helpers col = lambda s: law.util.colored(s, color="light_blue", style="bright") fmt = lambda s, *args: s.format(*(col(arg) for arg in args)) # 1 lumi_data = law.LocalFileTarget(self.lumi_file).load(formatter="json") valid_runs = [ int(run) for run, section in six.iteritems(lumi_data) if law.util.flatten(section) ] self.publish_message(fmt("found {} valid runs in lumi file", len(valid_runs))) # 2 all_menu_runs = (yield GetMenusInData.req(self)).load(formatter="json") menu_runs = { menu: [ run for run in data["runs"] if run in valid_runs ] for menu, data in six.iteritems(all_menu_runs) } # 3 menu_runs = { menu: runs for menu, runs in six.iteritems(menu_runs) if runs and law.util.multi_match(menu, self.hlt_menus, mode=any) } self.publish_message(fmt("found a total of {} valid runs in {} menus:\n{} ", sum(len(runs) for runs in six.itervalues(menu_runs)), len(menu_runs), "\n".join(menu_runs.keys()))) # 4 paths_inputs = yield { menu: GetPathsFromMenu.req(self, hlt_menu=menu) for menu in menu_runs } menu_paths = { menu: [ p for p in inp.load(formatter="json") if law.util.multi_match(p, self.hlt_paths) ] for menu, inp in six.iteritems(paths_inputs) } # 5 menu_path_pairs = sum(( [(menu, path) for path in paths] for menu, paths in six.iteritems(menu_paths) ), []) filter_inputs = yield { (menu, path): GetFilterNamesFromMenu.req(self, hlt_menu=menu, hlt_path=path) for menu, path in menu_path_pairs } filter_names = { (menu, path): [d["name"] for d in inps["filters"].load(formatter="json")] for (menu, path), inps in six.iteritems(filter_inputs) } # 6 data = [] for menu, runs in six.iteritems(menu_runs): data.append(dict( menu=menu, runs=runs, paths=[ dict( name=path, filters=filter_names[(menu, path)], ) for path in menu_paths[menu] ], )) # save the output and print the summary output = self.output() output.parent.touch() output.dump(data, indent=4, formatter="json") self.summary()
def output(self): return law.LocalFileTarget("data/docker/binned_%i_%i.txt" % (self.n_nums, self.n_bins))
def output(self): return law.LocalFileTarget( "$HGC_BASE/modules/hgcal-rechit-input-dat-gen/analyser")
def output(self): return law.LocalFileTarget('{}/{}/Combined/Final/{}.{}.{}.log'.format(self.merge_dir, self.name, self.process, self.branch_data['channel'], self.branch_data['observable']))
def output(self): return law.LocalFileTarget( "$HGC_BASE/modules/DeepJetCore/compiled/classdict.so")