Example #1
0
    def write_input(self):
        # Load previous hash here if it exists

        old_hash = None
        hash_file = f"{self.output_dir}/hash.txt"
        if os.path.exists(hash_file):
            with open(hash_file, "r") as f:
                old_hash = f.read().strip()
                self.logger.debug(f"Previous result found, hash is {old_hash}")

        # Put config in a temp directory
        temp_dir_obj = tempfile.TemporaryDirectory()
        temp_dir = temp_dir_obj.name

        # Copy the base files across
        for f in self.base_ia:
            shutil.copy(self.data_dir + f, temp_dir)
        for f in self.base_cc:
            shutil.copy(self.data_dir + f, temp_dir)

        # Copy the include input file if there is one
        input_copied = []
        fs = self.base_ia + self.base_cc
        for ff in fs:
            if ff not in input_copied:
                input_copied.append(ff)
                with open(self.data_dir + ff, "r") as f:
                    for line in f.readlines():
                        line = line.strip()
                        if line.startswith("INPUT_FILE_INCLUDE"):
                            include_file = line.split(":")[-1].strip()
                            self.logger.debug(f"Copying included file {include_file}")
                            shutil.copy(self.data_dir + include_file, temp_dir)

        # Write the primary input file
        main_input_file = f"{temp_dir}/{self.genversion}.input"
        with open(main_input_file, "w") as f:
            f.writelines(map(lambda s: s + '\n', self.base))
        self.logger.info(f"Input file written to {main_input_file}")

        # Remove any duplicates and order the output files
        output_files = [f"{temp_dir}/{a}" for a in sorted(os.listdir(temp_dir))]
        self.logger.debug(f"{len(output_files)} files used to create simulation. Hashing them.")

        # Also add this file to the hash, so if the code changes we also regenerate. Smart.
        output_files.append(os.path.abspath(inspect.stack()[0][1]))

        # Get current hash
        string_to_hash = ""
        for file in output_files:
            with open(file, "r") as f:
                string_to_hash += f.read()
        new_hash = get_hash(string_to_hash)
        self.logger.debug(f"Current hash set to {new_hash}")
        regenerate = old_hash is None or old_hash != new_hash

        if regenerate:
            self.logger.info(f"Running simulation, hash check failed")
            # Clean output dir. God I feel dangerous doing this, so hopefully unnecessary check
            if "//" not in self.output_dir and "Pippin" in self.output_dir:
                self.logger.debug(f"Cleaning output directory {self.output_dir}")
                shutil.rmtree(self.output_dir, ignore_errors=True)
                mkdirs(self.output_dir)
                self.logger.debug(f"Copying from {temp_dir} to {self.output_dir}")
                copytree(temp_dir, self.output_dir)
            with open(hash_file, "w") as f:
                f.write(str(new_hash))
                self.logger.debug(f"New hash saved to {hash_file}")
                self.hash_file = hash_file
            chown_dir(self.output_dir)
        else:
            self.logger.info("Hash check passed, not rerunning")
        temp_dir_obj.cleanup()
        return regenerate, new_hash
Example #2
0
    def write_input(self, force_refresh):
        self.set_property("GENVERSION",
                          self.genversion,
                          assignment=": ",
                          section_end="ENDLIST_GENVERSION")
        for k in self.config.keys():
            if k.upper() != "GLOBAL":
                run_config = self.config[k]
                run_config_keys = list(run_config.keys())
                assert "BASE" in run_config_keys, "You must specify a base file for each option"
                for key in run_config_keys:
                    if key.upper() in self.reserved_keywords:
                        continue
                    base_file = run_config["BASE"]
                    match = base_file.split(".")[0]
                    self.set_property(f"GENOPT({match})",
                                      f"{key} {run_config[key]}",
                                      section_end="ENDLIST_GENVERSION")

        for key in self.config.get("GLOBAL", []):
            if key.upper() == "BASE":
                continue
            self.set_property(key, self.config['GLOBAL'][key])
            if key == "RANSEED_CHANGE":
                self.delete_property("RANSEED_REPEAT")
            elif key == "RANSEED_REPEAT":
                self.delete_property("RANSEED_CHANGE")

        self.set_property("SIMGEN_INFILE_Ia",
                          " ".join(self.base_ia) if self.base_ia else None)
        self.set_property("SIMGEN_INFILE_NONIa",
                          " ".join(self.base_cc) if self.base_cc else None)
        self.set_property("GENPREFIX", self.genversion)

        # Put config in a temp directory
        temp_dir_obj = tempfile.TemporaryDirectory()
        temp_dir = temp_dir_obj.name

        # Copy the base files across
        for f in self.base_ia:
            shutil.copy(self.data_dir + f, temp_dir)
        for f in self.base_cc:
            shutil.copy(self.data_dir + f, temp_dir)

        # Copy the include input file if there is one
        input_copied = []
        fs = self.base_ia + self.base_cc
        for ff in fs:
            if ff not in input_copied:
                input_copied.append(ff)
                with open(self.data_dir + ff, "r") as f:
                    for line in f.readlines():
                        line = line.strip()
                        if line.startswith("INPUT_FILE_INCLUDE"):
                            include_file = line.split(":")[-1].strip()
                            self.logger.debug(
                                f"Copying included file {include_file}")
                            shutil.copy(self.data_dir + include_file, temp_dir)

        # Write the primary input file
        main_input_file = f"{temp_dir}/{self.genversion}.input"
        with open(main_input_file, "w") as f:
            f.writelines(map(lambda s: s + '\n', self.base))
        self.logger.info(f"Input file written to {main_input_file}")

        # Remove any duplicates and order the output files
        output_files = [
            f"{temp_dir}/{a}" for a in sorted(os.listdir(temp_dir))
        ]
        self.logger.debug(
            f"{len(output_files)} files used to create simulation. Hashing them."
        )

        # Get current hash
        new_hash = self.get_hash_from_files(output_files)
        old_hash = self.get_old_hash()
        regenerate = force_refresh or (old_hash is None
                                       or old_hash != new_hash)

        if regenerate:
            self.logger.info(f"Running simulation")
            # Clean output dir. God I feel dangerous doing this, so hopefully unnecessary check
            if "//" not in self.output_dir and len(self.output_dir) > 30:
                self.logger.debug(
                    f"Cleaning output directory {self.output_dir}")
                shutil.rmtree(self.output_dir, ignore_errors=True)
                mkdirs(self.output_dir)
                self.logger.debug(
                    f"Copying from {temp_dir} to {self.output_dir}")
                copytree(temp_dir, self.output_dir)
                self.save_new_hash(new_hash)
            else:
                self.logger.error(
                    f"Seems to be an issue with the output dir path: {self.output_dir}"
                )

            chown_dir(self.output_dir)
        else:
            self.logger.info("Hash check passed, not rerunning")
        temp_dir_obj.cleanup()
        return regenerate, new_hash
Example #3
0
    def write_input(self, force_refresh):
        self.set_property("GENVERSION",
                          self.genversion,
                          assignment=": ",
                          section_end="ENDLIST_GENVERSION")
        self.set_property("LOGDIR",
                          os.path.basename(self.sim_log_dir),
                          assignment=": ",
                          section_end="ENDLIST_GENVERSION")
        for k in self.config.keys():
            if k.upper() != "GLOBAL":
                run_config = self.config[k]
                run_config_keys = list(run_config.keys())
                assert "BASE" in run_config_keys, "You must specify a base file for each option"
                for key in run_config_keys:
                    if key.upper() in self.reserved_keywords:
                        continue
                    base_file = run_config["BASE"]
                    match = os.path.basename(base_file).split(".")[0]
                    val = run_config[key]
                    if not isinstance(val, list):
                        val = [val]
                    for v in val:
                        self.set_property(f"GENOPT({match})",
                                          f"{key} {v}",
                                          section_end="ENDLIST_GENVERSION",
                                          only_add=True)

        if len(self.data_dirs) > 1:
            data_dir = self.data_dirs[0]
            self.set_property("PATH_USER_INPUT", data_dir, assignment=": ")

        for key in self.config.get("GLOBAL", []):
            if key.upper() == "BASE":
                continue
            direct_set = [
                "FORMAT_MASK", "RANSEED_REPEAT", "RANSEED_CHANGE",
                "BATCH_INFO", "BATCH_MEM", "NGEN_UNIT", "RESET_CIDOFF"
            ]
            if key in direct_set:
                self.set_property(key,
                                  self.config["GLOBAL"][key],
                                  assignment=": ")
            else:
                self.set_property(f"GENOPT_GLOBAL: {key}",
                                  self.config["GLOBAL"][key],
                                  assignment=" ")

            if self.derived_batch_info:
                self.set_property("BATCH_INFO",
                                  self.derived_batch_info,
                                  assignment=": ")

            if key == "RANSEED_CHANGE":
                self.delete_property("RANSEED_REPEAT")
            elif key == "RANSEED_REPEAT":
                self.delete_property("RANSEED_CHANGE")

        self.set_property(
            "SIMGEN_INFILE_Ia",
            " ".join([os.path.basename(f)
                      for f in self.base_ia]) if self.base_ia else None)
        self.set_property(
            "SIMGEN_INFILE_NONIa",
            " ".join([os.path.basename(f)
                      for f in self.base_cc]) if self.base_cc else None)
        self.set_property("GENPREFIX", self.genprefix)

        # Put config in a temp directory
        temp_dir_obj = tempfile.TemporaryDirectory()
        temp_dir = temp_dir_obj.name

        # Copy the base files across
        input_paths = []
        for f in self.base_ia + self.base_cc:
            resolved = get_data_loc(f)
            shutil.copy(resolved, temp_dir)
            input_paths.append(os.path.join(temp_dir, os.path.basename(f)))
            self.logger.debug(f"Copying input file {resolved} to {temp_dir}")

        # Copy the include input file if there is one
        input_copied = []
        fs = self.base_ia + self.base_cc
        for ff in fs:
            if ff not in input_copied:
                input_copied.append(ff)
                path = get_data_loc(ff)
                copied_path = os.path.join(temp_dir, os.path.basename(path))
                with open(path, "r") as f:
                    for line in f.readlines():
                        line = line.strip()
                        if line.startswith("INPUT_FILE_INCLUDE"):
                            include_file = line.split(":")[-1].strip()
                            include_file_path = get_data_loc(include_file)
                            self.logger.debug(
                                f"Copying INPUT_FILE_INCLUDE file {include_file_path} to {temp_dir}"
                            )

                            include_file_basename = os.path.basename(
                                include_file_path)
                            include_file_output = os.path.join(
                                temp_dir, include_file_basename)

                            if include_file_output not in input_copied:

                                # Copy include file into the temp dir
                                shutil.copy(include_file_path, temp_dir)

                                # Then SED the file to replace the full path with just the basename
                                if include_file != include_file_basename:
                                    sed_command = f"sed -i -e 's|{include_file}|{include_file_basename}|g' {copied_path}"
                                    self.logger.debug(
                                        f"Running sed command: {sed_command}")
                                    subprocess.run(sed_command,
                                                   stderr=subprocess.STDOUT,
                                                   cwd=temp_dir,
                                                   shell=True)

                                # And make sure we dont do this file again
                                fs.append(include_file_output)

        # Write the primary input file
        main_input_file = f"{temp_dir}/{self.genversion}.input"
        with open(main_input_file, "w") as f:
            f.writelines(map(lambda s: s + "\n", self.base))
        self.logger.info(f"Input file written to {main_input_file}")

        # Remove any duplicates and order the output files
        output_files = [
            f"{temp_dir}/{a}" for a in sorted(os.listdir(temp_dir))
        ]
        self.logger.debug(
            f"{len(output_files)} files used to create simulation. Hashing them."
        )

        # Get current hash
        new_hash = self.get_hash_from_files(output_files)
        old_hash = self.get_old_hash()
        regenerate = force_refresh or (old_hash is None
                                       or old_hash != new_hash)

        if regenerate:
            self.logger.info(f"Running simulation")
            # Clean output dir. God I feel dangerous doing this, so hopefully unnecessary check
            if "//" not in self.output_dir and len(self.output_dir) > 30:
                self.logger.debug(
                    f"Cleaning output directory {self.output_dir}")
                shutil.rmtree(self.output_dir, ignore_errors=True)
                mkdirs(self.output_dir)
                self.logger.debug(
                    f"Copying from {temp_dir} to {self.output_dir}")
                copytree(temp_dir, self.output_dir)
                self.save_new_hash(new_hash)
            else:
                self.logger.error(
                    f"Seems to be an issue with the output dir path: {self.output_dir}"
                )

            chown_dir(self.output_dir)
        else:
            self.logger.info("Hash check passed, not rerunning")
        temp_dir_obj.cleanup()
        return regenerate, new_hash
Example #4
0
    def prepare_train_job(self, force_refresh):
        self.logger.debug("Preparing NML file for Nearest Neighbour training")
        fit_output = self.get_fit_dependency()

        genversion = fit_output["genversion"]
        fitres_dir = fit_output["fitres_dir"]
        fitres_file = fit_output["fitres_file"]
        nml_file_orig = fit_output["nml_file"]

        # Put config in a temp directory
        temp_dir_obj = tempfile.TemporaryDirectory()
        temp_dir = temp_dir_obj.name

        outfile_train = f'{self.name}_train.out'
        nml_file_train1 = f'{temp_dir}/{genversion}-2.nml'
        nml_file_train2 = f'{self.output_dir}/{genversion}-2.nml'

        train_info_local = {
            "outfile_NNtrain": outfile_train,
            "nml_file_NNtrain": nml_file_train2,
        }

        # construct sed to copy original NMLFILE and to
        #   + replace OUTDIR:
        #   + include ROOTFILE_OUT (to store histograms for NN train)
        #   + include DONE stamp for Sam/pippen
        #   + run afterburner to process ROOT file and get NN_trainPar;
        #     copy NN_trainPar up to where pippin can find it
        #
        # TODO: Check with Rick if the FITOPT000.ROOT is needed / should be hardcoded
        afterBurn = f'nearnbr_maxFoM.exe FITOPT000.ROOT -truetype 1 -outfile {outfile_train} ; cp {outfile_train} {self.outfile_train}'

        sedstr = 'sed'
        sedstr += (r" -e '/OUTDIR:/a\OUTDIR: %s' " % self.splitfit_output_dir)
        sedstr += r" -e '/OUTDIR:/d'"
        sedstr += r" -e '/DONE_STAMP:/d'"
        sedstr += r" -e '/SNTABLE_LIST/a\    ROOTFILE_OUT = \"bla.root\"'"
        sedstr += r" -e '/_OUT/d '"
        sedstr += (r" -e '/VERSION:/a\VERSION_AFTERBURNER: %s'" % afterBurn)
        sedstr += (r" -e '/VERSION:/a\DONE_STAMP: %s'" % self.done_file)
        sed_command = ("%s %s > %s" % (sedstr, nml_file_orig, nml_file_train1))

        # use system call to apply sed command
        # self.logger.debug(f"Running sed command {sed_command}")
        subprocess.run(sed_command,
                       stderr=subprocess.STDOUT,
                       cwd=temp_dir,
                       shell=True)

        # make sure that the new NML file is really there
        if not os.path.isfile(nml_file_train1):
            self.logger.error(
                f"Unable to create {nml_file_train1} with sed command {sed_command}"
            )
            return None

        # check that expected FITRES ref file is really there.
        if not os.path.exists(fitres_file):
            self.logger.error(
                'Cannot find expected FITRES file at {fitres_path}')
            return None

        # open NML file in append mode and tack on NNINP namelist
        with open(nml_file_train1, 'a') as f:
            f.write("\n# NNINP below added by prepare_NNtrainJob\n")
            f.write("\n&NNINP \n")
            f.write("   NEARNBR_TRAINFILE_PATH = '%s' \n" % fitres_dir)
            f.write("   NEARNBR_TRAINFILE_LIST = '%s' \n" %
                    os.path.basename(fitres_file))
            f.write("   NEARNBR_SEPMAX_VARDEF  = '%s' \n" % self.nn_options)
            f.write("   NEARNBR_TRUETYPE_VARNAME = 'SIM_TYPE_INDEX' \n")
            f.write("   NEARNBR_TRAIN_ODDEVEN = T \n")
            f.write("\n&END\n")

        input_files = [nml_file_train1]
        old_hash = self.get_old_hash()
        new_hash = self.get_hash_from_files(input_files)

        if force_refresh or new_hash != old_hash:
            self.logger.debug("Regenerating")
            shutil.rmtree(self.output_dir, ignore_errors=True)
            mkdirs(self.output_dir)
            self.logger.debug(f"Copying from {temp_dir} to {self.output_dir}")
            copytree(temp_dir, self.output_dir)
            self.save_new_hash(new_hash)
            return new_hash, train_info_local
        else:
            self.logger.debug("Not regenerating")
            return None, train_info_local
Example #5
0
    def write_input(self):
        # As Pippin only does one GENVERSION at a time, lets extract it first, and also the config
        c = self.yaml["CONFIG"]
        d = self.yaml["GENVERSION_LIST"][0]
        g = self.yaml["GENOPT_GLOBAL"]

        # Ensure g is a dict with a ref we can update
        if g is None:
            g = {}
            self.yaml["GENOPT_GLOBAL"] = g

        # Start setting properties in the right area
        d["GENVERSION"] = self.genversion

        # Logging now goes in the "CONFIG"
        c["LOGDIR"] = os.path.basename(self.sim_log_dir)

        for k in self.config.keys():
            if k.upper() not in self.reserved_top:
                run_config = self.config[k]
                run_config_keys = list(run_config.keys())
                assert "BASE" in run_config_keys, "You must specify a base file for each option"
                for key in run_config_keys:
                    if key.upper() in self.reserved_keywords:
                        continue
                    base_file = run_config["BASE"]
                    match = os.path.basename(base_file).split(".")[0]
                    val = run_config[key]
                    if not isinstance(val, list):
                        val = [val]

                    lookup = f"GENOPT({match})"
                    if lookup not in d:
                        d[lookup] = {}
                    for v in val:
                        d[lookup][key] = v

        if len(self.data_dirs) > 1:
            data_dir = self.data_dirs[0]
            c["PATH_USER_INPUT"] = data_dir

        for key in self.config.get("GLOBAL", []):
            if key.upper() == "BASE":
                continue
            direct_set = [
                "FORMAT_MASK", "RANSEED_REPEAT", "RANSEED_CHANGE",
                "BATCH_INFO", "BATCH_MEM", "NGEN_UNIT", "RESET_CIDOFF"
            ]
            if key in direct_set:
                c[key] = self.config["GLOBAL"][key]
            else:
                g[key] = self.config["GLOBAL"][key]

            if self.derived_batch_info:
                c["BATCH_INFO"] = self.derived_batch_info

            if key == "RANSEED_CHANGE" and c.get("RANSEED_REPEAT") is not None:
                del c["RANSEED_REPEAT"]
            elif key == "RANSEED_REPEAT" and c.get(
                    "RANSEED_CHANGE") is not None:
                del c["RANSEED_CHANGE"]

        if self.base_ia:
            c["SIMGEN_INFILE_Ia"] = [os.path.basename(f) for f in self.base_ia]
        else:
            del c["SIMGEN_INFILE_Ia"]

        if self.base_cc:
            c["SIMGEN_INFILE_NONIa"] = [
                os.path.basename(f) for f in self.base_cc
            ]
        else:
            del c["SIMGEN_INFILE_NONIa"]

        c["GENPREFIX"] = self.genprefix

        # Put config in a temp directory
        temp_dir_obj = tempfile.TemporaryDirectory()
        temp_dir = temp_dir_obj.name

        # Copy the base files across
        input_paths = []
        for f in self.base_ia + self.base_cc:
            resolved = get_data_loc(f)
            shutil.copy(resolved, temp_dir)
            input_paths.append(os.path.join(temp_dir, os.path.basename(f)))
            self.logger.debug(f"Copying input file {resolved} to {temp_dir}")

        # Copy the include input file if there is one
        input_copied = []
        fs = self.base_ia + self.base_cc
        for ff in fs:
            if ff not in input_copied:
                input_copied.append(ff)
                path = get_data_loc(ff)
                copied_path = os.path.join(temp_dir, os.path.basename(path))
                with open(path, "r") as f:
                    for line in f.readlines():
                        line = line.strip()
                        if line.startswith("INPUT_FILE_INCLUDE"):
                            include_file = line.split(":")[-1].strip()
                            include_file_path = get_data_loc(include_file)
                            self.logger.debug(
                                f"Copying INPUT_FILE_INCLUDE file {include_file_path} to {temp_dir}"
                            )

                            include_file_basename = os.path.basename(
                                include_file_path)
                            include_file_output = os.path.join(
                                temp_dir, include_file_basename)

                            if include_file_output not in input_copied:

                                # Copy include file into the temp dir
                                shutil.copy(include_file_path, temp_dir)

                                # Then SED the file to replace the full path with just the basename
                                if include_file != include_file_basename:
                                    sed_command = f"sed -i -e 's|{include_file}|{include_file_basename}|g' {copied_path}"
                                    self.logger.debug(
                                        f"Running sed command: {sed_command}")
                                    subprocess.run(sed_command,
                                                   stderr=subprocess.STDOUT,
                                                   cwd=temp_dir,
                                                   shell=True)

                                # And make sure we dont do this file again
                                fs.append(include_file_output)

        # Write the primary input file
        main_input_file = f"{temp_dir}/{self.genversion}.input"
        self.write_output_file(main_input_file)

        # Remove any duplicates and order the output files
        output_files = [
            f"{temp_dir}/{a}" for a in sorted(os.listdir(temp_dir))
        ]
        self.logger.debug(
            f"{len(output_files)} files used to create simulation. Hashing them."
        )

        # Get current hash
        new_hash = self.get_hash_from_files(output_files)
        regenerate = self._check_regenerate(new_hash)

        if regenerate:
            self.logger.info(f"Running simulation")
            # Clean output dir. God I feel dangerous doing this, so hopefully unnecessary check
            if "//" not in self.output_dir and len(self.output_dir) > 30:
                self.logger.debug(
                    f"Cleaning output directory {self.output_dir}")
                shutil.rmtree(self.output_dir, ignore_errors=True)
                mkdirs(self.output_dir)
                self.logger.debug(
                    f"Copying from {temp_dir} to {self.output_dir}")
                copytree(temp_dir, self.output_dir)
                self.save_new_hash(new_hash)
            else:
                self.logger.error(
                    f"Seems to be an issue with the output dir path: {self.output_dir}"
                )

            chown_dir(self.output_dir)
        else:
            self.logger.info("Hash check passed, not rerunning")
        temp_dir_obj.cleanup()
        return regenerate, new_hash