Example #1
0
    def run(self):
        with self.input()["merged"].localize("r") as inp:
            # write the path of the input file to a temporary file
            samples_file = law.LocalFileTarget(is_tmp=True)
            samples_file.touch(content="{}\n".format(inp.path))

            # tmp dir for output files
            tmp_dir = law.LocalDirectoryTarget(is_tmp=True)

            # create the conversion command
            compile_task = self.requires()["deepjetcore"]
            cmd = """
                {} &&
                export HGCALML="$HGC_BASE/modules/HGCalML"
                export DEEPJETCORE_SUBPACKAGE="$HGCALML"
                export PYTHONPATH="$HGCALML/modules:$HGCALML/modules/datastructures:$PYTHONPATH"
                convertFromRoot.py -n 0 --noRelativePaths -c TrainData_{} -o "{}" -i "{}"
            """.format(compile_task.get_setup_cmd(), self.data_structure, tmp_dir.path,
                samples_file.path)

            # run the command
            code = law.util.interruptable_popen(cmd, env=compile_task.get_setup_env(), shell=True,
                executable="/bin/bash")[0]
            if code != 0:
                raise Exception("convertFromRoot.py failed")

        outp = self.output()
        outp["x"].copy_from_local(tmp_dir.child(outp["x"].basename))
        outp["y"].copy_from_local(tmp_dir.child(outp["y"].basename))
        outp["meta"].copy_from_local(tmp_dir.child(outp["meta"].basename))
        outp["dc"].copy_from_local(tmp_dir.child("dataCollection.dc"))
Example #2
0
    def run(self):
        # create a tmp dir
        tmp_dir = law.LocalDirectoryTarget(is_tmp=True)
        tmp_dir.touch()

        # download all setup files
        def download(src):
            h = self.create_path_hash(src)
            if h is None:
                return
            self.publish_message("download {}".format(src))
            dst = os.path.join(tmp_dir.path, h)
            if src.startswith("http"):
                wget(src, dst)
            # if afs is not available on our system, use scp
            elif src.startswith("/afs") and not os.path.exists(src):
                p = subprocess.Popen(
                    ["scp", "{}:{}".format(self.afs_host, src), dst])
                p.communicate()  # wait for transfer to finish
            else:
                shutil.copy2(src, dst)

            if not os.path.exists(dst):
                raise Exception("File copy failed!")

        law.util.map_struct(download, self.source_files)

        # create a tmp archive
        tmp_arc = law.LocalFileTarget(is_tmp="tgz")
        tmp_arc.dump(tmp_dir)

        # transfer
        self.transfer(tmp_arc)
 def output(self):
   basename = os.path.basename(FastProd(branch = self.branch).output().path)
   parts = basename.split('.')
   parts.pop()
   parts.pop()
   parts.append('log')
   outfile = '.'.join(parts)
   return law.LocalFileTarget('{}/{}/{}/{}'.format(self.merge_dir, self.name, self.branch_data['channel'], outfile))
Example #4
0
    def run(self):
        # create the local bundle
        self.source_path = self.env["JTSF_SOFTWARE"] + ".tgz"
        bundle = law.LocalFileTarget(self.source_path, is_tmp=True)
        def _filter(tarinfo):
            return None if re.search("(\.pyc|\/\.git|\.tgz)$", tarinfo.name) else tarinfo
        bundle.dump(os.path.splitext(self.source_path)[0], filter=_filter)
        self.publish_message("bundled software archive")

        # super run will upload all files for us
        super(UploadSoftware, self).run()
Example #5
0
    def run(self):
        # create the bundle
        bundle = law.LocalFileTarget(is_tmp="tgz")
        self.bundle(bundle)

        # log the size
        self.publish_message("bundled CMSSW archive, size is {:.2f} {}".format(
            *law.util.human_bytes(bundle.stat.st_size)))

        # transfer the bundle and mark the task as complete
        self.transfer(bundle)
        self.mark_complete()
Example #6
0
    def run(self):
        # create the bundle
        bundle = law.LocalFileTarget(is_tmp="tgz")
        self.bundle(bundle)

        # log the size
        self.publish_message(
            "bundled repository archive, size is {:.2f} {}".format(
                *law.util.human_bytes(bundle.stat.st_size)))

        # transfer the bundle
        self.transfer(bundle)
Example #7
0
    def run(self):
        lfns = []
        for key in self.dataset_inst.keys:
            print("get lfns for key {}".format(key))
            cmd = "dasgoclient -query='file dataset={}' -limit=0".format(key)
            code, out, _ = law.util.interruptable_popen(cmd,
                                                        shell=True,
                                                        stdout=subprocess.PIPE,
                                                        executable="/bin/bash")
            if code != 0:
                raise Exception("dasgoclient query failed")
            lfns.extend(out.strip().split("\n"))

        if not (len(lfns) == self.dataset_inst.n_files):
            raise ValueError("Number of lfns does not match number of files "
                             "for dataset {}".format(self.dataset_inst.name))

        tmp = law.LocalFileTarget(is_tmp="json")
        tmp.dump(lfns)
        self.transfer(tmp)
Example #8
0
    def run(self):
        # determine the converter executable
        inp = self.input()
        converter = inp["converter"].path
        converter_dir = inp["converter"].parent

        # read the config template
        with converter_dir.child("config/config_template.txt").open("r") as f:
            template = f.read()

        # temporary output directory
        output_dir = law.LocalDirectoryTarget(is_tmp=True)
        output_dir.touch()

        # fill template variables
        with inp["ntup"].localize("r") as ntup_file:
            config = template.format(
                input_dir=ntup_file.parent.path,
                input_file=ntup_file.basename,
                output_dir=output_dir.path,
                hist_output_file="no_used.root",
                skim_output_prefix="output_file_",
            )

            # create a config file required by the converter
            config_file = law.LocalFileTarget(is_tmp=True)
            with config_file.open("w") as f:
                f.write(config)

            # run the converter
            env_script = converter_dir.child("env.sh").path
            cmd = "source {} '' && {} {}".format(env_script, converter, config_file.path)
            code = law.util.interruptable_popen(cmd, shell=True, executable="/bin/bash")[0]
            if code != 0:
                raise Exception("conversion failed")

        # determine the skim output file and
        output_basename = output_dir.glob("output_file_*")[0]
        self.output().copy_from_local(output_dir.child(output_basename))
Example #9
0
    def run(self):
        software_path = os.environ["HGC_SOFTWARE"]

        # create the local bundle
        bundle = law.LocalFileTarget(software_path + ".tgz", is_tmp=True)

        def _filter(tarinfo):
            if re.search(r"(\.pyc|\/\.git|\.tgz|__pycache__)$", tarinfo.name):
                return None
            return tarinfo

        # create the archive with a custom filter
        bundle.dump(software_path, filter=_filter)

        # log the size
        self.publish_message(
            "bundled software archive, size is {:.2f} {}".format(
                *law.util.human_bytes(bundle.stat.st_size)))

        # transfer the bundle and mark the task as complete
        self.transfer(bundle)
        self.mark_complete()
Example #10
0
File: tasks.py Project: yrath/law
 def local_target(self, *path, **kwargs):
     return law.LocalFileTarget(self.local_path(*path), **kwargs)
Example #11
0
 def local_target(self, *path):
     return law.LocalFileTarget(self.local_path(*path))
Example #12
0
 def output(self):
     return law.LocalFileTarget("data/docker/numbers_%i.txt" % self.n_nums)
Example #13
0
    def run(self):
        # read menu data
        menu_data = self.input().load(formatter="json")

        # expand run numbers
        if self.run_numbers:
            run_numbers = set()
            for r in self.run_numbers:
                if r.count("-") == 1:
                    start, end = [int(s) for s in r.split("-")]
                    run_numbers |= set(range(start, end + 1))
                else:
                    run_numbers.add(int(r))
            run_numbers = sorted(list(run_numbers))
        else:
            lumi_data = law.LocalFileTarget(self.lumi_file).load(formatter="json")
            run_numbers = [int(r) for r in lumi_data.keys()]

        # reduce menu data to a simple mapping menu -> valid runs
        menu_runs = {
            menu: [r for r in data["runs"] if r in run_numbers]
            for menu, data in six.iteritems(menu_data)
        }
        menu_runs = {
            menu: runs
            for menu, runs in six.iteritems(menu_runs)
            if runs
        }
        self.publish_message("found {} trigger menus".format(len(menu_runs)))

        # get all paths for all menus
        paths_inputs = yield {
            menu: GetPathsFromMenu.req(self, hlt_menu=menu)
            for menu in menu_runs
        }
        menu_paths = {
            menu: sorted(inp.load(formatter="json"))
            for menu, inp in six.iteritems(paths_inputs)
        }

        # filter by given hlt path patterns
        if self.hlt_paths:
            menu_paths = {
                menu: [p for p in paths if law.util.multi_match(p, self.hlt_paths, mode=any)]
                for menu, paths in six.iteritems(menu_paths)
            }
            menu_paths = {
                menu: paths
                for menu, paths in six.iteritems(menu_paths)
                if paths
            }

        # merge output data
        data = {
            menu: dict(runs=menu_runs[menu], paths=paths)
            for menu, paths in six.iteritems(menu_paths)
        }

        # save the output and print the summary
        output = self.output()
        output.parent.touch()
        output.dump(data, indent=4, formatter="json")
        self.summary()
Example #14
0
 def output(self):
     return law.LocalFileTarget(
         '{}/{}/Combined/Final/{}.{}.{}.tab.gz'.format(
             self.merge_dir, self.name, self.process,
             self.branch_data['order'], self.branch_data['obs']))
 def output(self):
     return law.LocalFileTarget(self.local_path("data.npz"))
Example #16
0
 def run(self):
     bundle = law.LocalFileTarget(is_tmp="tgz")
     self.bundle(bundle)
     self.transfer(bundle)
Example #17
0
    def run(self):
        # strategy:
        # 1. Get the list of valid run numbers from the lumi file.
        # 2. Get all menus and associated runs, and filter the latter by (1).
        # 3. Filter menus using the provided menu patterns.
        # 4. For all menus, get the list of paths and filter them using the provided path patterns.
        # 5. Get filter names for each menu and path combination.
        # 6. Save the data.

        # coloring and colored formatter helpers
        col = lambda s: law.util.colored(s, color="light_blue", style="bright")
        fmt = lambda s, *args: s.format(*(col(arg) for arg in args))

        # 1
        lumi_data = law.LocalFileTarget(self.lumi_file).load(formatter="json")
        valid_runs = [
            int(run) for run, section in six.iteritems(lumi_data)
            if law.util.flatten(section)
        ]
        self.publish_message(fmt("found {} valid runs in lumi file", len(valid_runs)))

        # 2
        all_menu_runs = (yield GetMenusInData.req(self)).load(formatter="json")
        menu_runs = {
            menu: [
                run for run in data["runs"]
                if run in valid_runs
            ]
            for menu, data in six.iteritems(all_menu_runs)
        }

        # 3
        menu_runs = {
            menu: runs for menu, runs in six.iteritems(menu_runs)
            if runs and law.util.multi_match(menu, self.hlt_menus, mode=any)
        }
        self.publish_message(fmt("found a total of {} valid runs in {} menus:\n{} ",
            sum(len(runs) for runs in six.itervalues(menu_runs)), len(menu_runs),
            "\n".join(menu_runs.keys())))

        # 4
        paths_inputs = yield {
            menu: GetPathsFromMenu.req(self, hlt_menu=menu)
            for menu in menu_runs
        }
        menu_paths = {
            menu: [
                p for p in inp.load(formatter="json")
                if law.util.multi_match(p, self.hlt_paths)
            ]
            for menu, inp in six.iteritems(paths_inputs)
        }

        # 5
        menu_path_pairs = sum((
            [(menu, path) for path in paths]
            for menu, paths in six.iteritems(menu_paths)
        ), [])
        filter_inputs = yield {
            (menu, path): GetFilterNamesFromMenu.req(self, hlt_menu=menu, hlt_path=path)
            for menu, path in menu_path_pairs
        }
        filter_names = {
            (menu, path): [d["name"] for d in inps["filters"].load(formatter="json")]
            for (menu, path), inps in six.iteritems(filter_inputs)
        }

        # 6
        data = []
        for menu, runs in six.iteritems(menu_runs):
            data.append(dict(
                menu=menu,
                runs=runs,
                paths=[
                    dict(
                        name=path,
                        filters=filter_names[(menu, path)],
                    )
                    for path in menu_paths[menu]
                ],
            ))

        # save the output and print the summary
        output = self.output()
        output.parent.touch()
        output.dump(data, indent=4, formatter="json")
        self.summary()
Example #18
0
 def output(self):
     return law.LocalFileTarget("data/docker/binned_%i_%i.txt" %
                                (self.n_nums, self.n_bins))
Example #19
0
 def output(self):
     return law.LocalFileTarget(
         "$HGC_BASE/modules/hgcal-rechit-input-dat-gen/analyser")
Example #20
0
 def output(self):
   return law.LocalFileTarget('{}/{}/Combined/Final/{}.{}.{}.log'.format(self.merge_dir, self.name, self.process, self.branch_data['channel'], self.branch_data['observable']))
Example #21
0
 def output(self):
     return law.LocalFileTarget(
         "$HGC_BASE/modules/DeepJetCore/compiled/classdict.so")