def write_input(self): # Load previous hash here if it exists old_hash = None hash_file = f"{self.output_dir}/hash.txt" if os.path.exists(hash_file): with open(hash_file, "r") as f: old_hash = f.read().strip() self.logger.debug(f"Previous result found, hash is {old_hash}") # Put config in a temp directory temp_dir_obj = tempfile.TemporaryDirectory() temp_dir = temp_dir_obj.name # Copy the base files across for f in self.base_ia: shutil.copy(self.data_dir + f, temp_dir) for f in self.base_cc: shutil.copy(self.data_dir + f, temp_dir) # Copy the include input file if there is one input_copied = [] fs = self.base_ia + self.base_cc for ff in fs: if ff not in input_copied: input_copied.append(ff) with open(self.data_dir + ff, "r") as f: for line in f.readlines(): line = line.strip() if line.startswith("INPUT_FILE_INCLUDE"): include_file = line.split(":")[-1].strip() self.logger.debug(f"Copying included file {include_file}") shutil.copy(self.data_dir + include_file, temp_dir) # Write the primary input file main_input_file = f"{temp_dir}/{self.genversion}.input" with open(main_input_file, "w") as f: f.writelines(map(lambda s: s + '\n', self.base)) self.logger.info(f"Input file written to {main_input_file}") # Remove any duplicates and order the output files output_files = [f"{temp_dir}/{a}" for a in sorted(os.listdir(temp_dir))] self.logger.debug(f"{len(output_files)} files used to create simulation. Hashing them.") # Also add this file to the hash, so if the code changes we also regenerate. Smart. output_files.append(os.path.abspath(inspect.stack()[0][1])) # Get current hash string_to_hash = "" for file in output_files: with open(file, "r") as f: string_to_hash += f.read() new_hash = get_hash(string_to_hash) self.logger.debug(f"Current hash set to {new_hash}") regenerate = old_hash is None or old_hash != new_hash if regenerate: self.logger.info(f"Running simulation, hash check failed") # Clean output dir. God I feel dangerous doing this, so hopefully unnecessary check if "//" not in self.output_dir and "Pippin" in self.output_dir: self.logger.debug(f"Cleaning output directory {self.output_dir}") shutil.rmtree(self.output_dir, ignore_errors=True) mkdirs(self.output_dir) self.logger.debug(f"Copying from {temp_dir} to {self.output_dir}") copytree(temp_dir, self.output_dir) with open(hash_file, "w") as f: f.write(str(new_hash)) self.logger.debug(f"New hash saved to {hash_file}") self.hash_file = hash_file chown_dir(self.output_dir) else: self.logger.info("Hash check passed, not rerunning") temp_dir_obj.cleanup() return regenerate, new_hash
def write_input(self, force_refresh): self.set_property("GENVERSION", self.genversion, assignment=": ", section_end="ENDLIST_GENVERSION") for k in self.config.keys(): if k.upper() != "GLOBAL": run_config = self.config[k] run_config_keys = list(run_config.keys()) assert "BASE" in run_config_keys, "You must specify a base file for each option" for key in run_config_keys: if key.upper() in self.reserved_keywords: continue base_file = run_config["BASE"] match = base_file.split(".")[0] self.set_property(f"GENOPT({match})", f"{key} {run_config[key]}", section_end="ENDLIST_GENVERSION") for key in self.config.get("GLOBAL", []): if key.upper() == "BASE": continue self.set_property(key, self.config['GLOBAL'][key]) if key == "RANSEED_CHANGE": self.delete_property("RANSEED_REPEAT") elif key == "RANSEED_REPEAT": self.delete_property("RANSEED_CHANGE") self.set_property("SIMGEN_INFILE_Ia", " ".join(self.base_ia) if self.base_ia else None) self.set_property("SIMGEN_INFILE_NONIa", " ".join(self.base_cc) if self.base_cc else None) self.set_property("GENPREFIX", self.genversion) # Put config in a temp directory temp_dir_obj = tempfile.TemporaryDirectory() temp_dir = temp_dir_obj.name # Copy the base files across for f in self.base_ia: shutil.copy(self.data_dir + f, temp_dir) for f in self.base_cc: shutil.copy(self.data_dir + f, temp_dir) # Copy the include input file if there is one input_copied = [] fs = self.base_ia + self.base_cc for ff in fs: if ff not in input_copied: input_copied.append(ff) with open(self.data_dir + ff, "r") as f: for line in f.readlines(): line = line.strip() if line.startswith("INPUT_FILE_INCLUDE"): include_file = line.split(":")[-1].strip() self.logger.debug( f"Copying included file {include_file}") shutil.copy(self.data_dir + include_file, temp_dir) # Write the primary input file main_input_file = f"{temp_dir}/{self.genversion}.input" with open(main_input_file, "w") as f: f.writelines(map(lambda s: s + '\n', self.base)) self.logger.info(f"Input file written to {main_input_file}") # Remove any duplicates and order the output files output_files = [ f"{temp_dir}/{a}" for a in sorted(os.listdir(temp_dir)) ] self.logger.debug( f"{len(output_files)} files used to create simulation. Hashing them." ) # Get current hash new_hash = self.get_hash_from_files(output_files) old_hash = self.get_old_hash() regenerate = force_refresh or (old_hash is None or old_hash != new_hash) if regenerate: self.logger.info(f"Running simulation") # Clean output dir. God I feel dangerous doing this, so hopefully unnecessary check if "//" not in self.output_dir and len(self.output_dir) > 30: self.logger.debug( f"Cleaning output directory {self.output_dir}") shutil.rmtree(self.output_dir, ignore_errors=True) mkdirs(self.output_dir) self.logger.debug( f"Copying from {temp_dir} to {self.output_dir}") copytree(temp_dir, self.output_dir) self.save_new_hash(new_hash) else: self.logger.error( f"Seems to be an issue with the output dir path: {self.output_dir}" ) chown_dir(self.output_dir) else: self.logger.info("Hash check passed, not rerunning") temp_dir_obj.cleanup() return regenerate, new_hash
def write_input(self, force_refresh): self.set_property("GENVERSION", self.genversion, assignment=": ", section_end="ENDLIST_GENVERSION") self.set_property("LOGDIR", os.path.basename(self.sim_log_dir), assignment=": ", section_end="ENDLIST_GENVERSION") for k in self.config.keys(): if k.upper() != "GLOBAL": run_config = self.config[k] run_config_keys = list(run_config.keys()) assert "BASE" in run_config_keys, "You must specify a base file for each option" for key in run_config_keys: if key.upper() in self.reserved_keywords: continue base_file = run_config["BASE"] match = os.path.basename(base_file).split(".")[0] val = run_config[key] if not isinstance(val, list): val = [val] for v in val: self.set_property(f"GENOPT({match})", f"{key} {v}", section_end="ENDLIST_GENVERSION", only_add=True) if len(self.data_dirs) > 1: data_dir = self.data_dirs[0] self.set_property("PATH_USER_INPUT", data_dir, assignment=": ") for key in self.config.get("GLOBAL", []): if key.upper() == "BASE": continue direct_set = [ "FORMAT_MASK", "RANSEED_REPEAT", "RANSEED_CHANGE", "BATCH_INFO", "BATCH_MEM", "NGEN_UNIT", "RESET_CIDOFF" ] if key in direct_set: self.set_property(key, self.config["GLOBAL"][key], assignment=": ") else: self.set_property(f"GENOPT_GLOBAL: {key}", self.config["GLOBAL"][key], assignment=" ") if self.derived_batch_info: self.set_property("BATCH_INFO", self.derived_batch_info, assignment=": ") if key == "RANSEED_CHANGE": self.delete_property("RANSEED_REPEAT") elif key == "RANSEED_REPEAT": self.delete_property("RANSEED_CHANGE") self.set_property( "SIMGEN_INFILE_Ia", " ".join([os.path.basename(f) for f in self.base_ia]) if self.base_ia else None) self.set_property( "SIMGEN_INFILE_NONIa", " ".join([os.path.basename(f) for f in self.base_cc]) if self.base_cc else None) self.set_property("GENPREFIX", self.genprefix) # Put config in a temp directory temp_dir_obj = tempfile.TemporaryDirectory() temp_dir = temp_dir_obj.name # Copy the base files across input_paths = [] for f in self.base_ia + self.base_cc: resolved = get_data_loc(f) shutil.copy(resolved, temp_dir) input_paths.append(os.path.join(temp_dir, os.path.basename(f))) self.logger.debug(f"Copying input file {resolved} to {temp_dir}") # Copy the include input file if there is one input_copied = [] fs = self.base_ia + self.base_cc for ff in fs: if ff not in input_copied: input_copied.append(ff) path = get_data_loc(ff) copied_path = os.path.join(temp_dir, os.path.basename(path)) with open(path, "r") as f: for line in f.readlines(): line = line.strip() if line.startswith("INPUT_FILE_INCLUDE"): include_file = line.split(":")[-1].strip() include_file_path = get_data_loc(include_file) self.logger.debug( f"Copying INPUT_FILE_INCLUDE file {include_file_path} to {temp_dir}" ) include_file_basename = os.path.basename( include_file_path) include_file_output = os.path.join( temp_dir, include_file_basename) if include_file_output not in input_copied: # Copy include file into the temp dir shutil.copy(include_file_path, temp_dir) # Then SED the file to replace the full path with just the basename if include_file != include_file_basename: sed_command = f"sed -i -e 's|{include_file}|{include_file_basename}|g' {copied_path}" self.logger.debug( f"Running sed command: {sed_command}") subprocess.run(sed_command, stderr=subprocess.STDOUT, cwd=temp_dir, shell=True) # And make sure we dont do this file again fs.append(include_file_output) # Write the primary input file main_input_file = f"{temp_dir}/{self.genversion}.input" with open(main_input_file, "w") as f: f.writelines(map(lambda s: s + "\n", self.base)) self.logger.info(f"Input file written to {main_input_file}") # Remove any duplicates and order the output files output_files = [ f"{temp_dir}/{a}" for a in sorted(os.listdir(temp_dir)) ] self.logger.debug( f"{len(output_files)} files used to create simulation. Hashing them." ) # Get current hash new_hash = self.get_hash_from_files(output_files) old_hash = self.get_old_hash() regenerate = force_refresh or (old_hash is None or old_hash != new_hash) if regenerate: self.logger.info(f"Running simulation") # Clean output dir. God I feel dangerous doing this, so hopefully unnecessary check if "//" not in self.output_dir and len(self.output_dir) > 30: self.logger.debug( f"Cleaning output directory {self.output_dir}") shutil.rmtree(self.output_dir, ignore_errors=True) mkdirs(self.output_dir) self.logger.debug( f"Copying from {temp_dir} to {self.output_dir}") copytree(temp_dir, self.output_dir) self.save_new_hash(new_hash) else: self.logger.error( f"Seems to be an issue with the output dir path: {self.output_dir}" ) chown_dir(self.output_dir) else: self.logger.info("Hash check passed, not rerunning") temp_dir_obj.cleanup() return regenerate, new_hash
def prepare_train_job(self, force_refresh): self.logger.debug("Preparing NML file for Nearest Neighbour training") fit_output = self.get_fit_dependency() genversion = fit_output["genversion"] fitres_dir = fit_output["fitres_dir"] fitres_file = fit_output["fitres_file"] nml_file_orig = fit_output["nml_file"] # Put config in a temp directory temp_dir_obj = tempfile.TemporaryDirectory() temp_dir = temp_dir_obj.name outfile_train = f'{self.name}_train.out' nml_file_train1 = f'{temp_dir}/{genversion}-2.nml' nml_file_train2 = f'{self.output_dir}/{genversion}-2.nml' train_info_local = { "outfile_NNtrain": outfile_train, "nml_file_NNtrain": nml_file_train2, } # construct sed to copy original NMLFILE and to # + replace OUTDIR: # + include ROOTFILE_OUT (to store histograms for NN train) # + include DONE stamp for Sam/pippen # + run afterburner to process ROOT file and get NN_trainPar; # copy NN_trainPar up to where pippin can find it # # TODO: Check with Rick if the FITOPT000.ROOT is needed / should be hardcoded afterBurn = f'nearnbr_maxFoM.exe FITOPT000.ROOT -truetype 1 -outfile {outfile_train} ; cp {outfile_train} {self.outfile_train}' sedstr = 'sed' sedstr += (r" -e '/OUTDIR:/a\OUTDIR: %s' " % self.splitfit_output_dir) sedstr += r" -e '/OUTDIR:/d'" sedstr += r" -e '/DONE_STAMP:/d'" sedstr += r" -e '/SNTABLE_LIST/a\ ROOTFILE_OUT = \"bla.root\"'" sedstr += r" -e '/_OUT/d '" sedstr += (r" -e '/VERSION:/a\VERSION_AFTERBURNER: %s'" % afterBurn) sedstr += (r" -e '/VERSION:/a\DONE_STAMP: %s'" % self.done_file) sed_command = ("%s %s > %s" % (sedstr, nml_file_orig, nml_file_train1)) # use system call to apply sed command # self.logger.debug(f"Running sed command {sed_command}") subprocess.run(sed_command, stderr=subprocess.STDOUT, cwd=temp_dir, shell=True) # make sure that the new NML file is really there if not os.path.isfile(nml_file_train1): self.logger.error( f"Unable to create {nml_file_train1} with sed command {sed_command}" ) return None # check that expected FITRES ref file is really there. if not os.path.exists(fitres_file): self.logger.error( 'Cannot find expected FITRES file at {fitres_path}') return None # open NML file in append mode and tack on NNINP namelist with open(nml_file_train1, 'a') as f: f.write("\n# NNINP below added by prepare_NNtrainJob\n") f.write("\n&NNINP \n") f.write(" NEARNBR_TRAINFILE_PATH = '%s' \n" % fitres_dir) f.write(" NEARNBR_TRAINFILE_LIST = '%s' \n" % os.path.basename(fitres_file)) f.write(" NEARNBR_SEPMAX_VARDEF = '%s' \n" % self.nn_options) f.write(" NEARNBR_TRUETYPE_VARNAME = 'SIM_TYPE_INDEX' \n") f.write(" NEARNBR_TRAIN_ODDEVEN = T \n") f.write("\n&END\n") input_files = [nml_file_train1] old_hash = self.get_old_hash() new_hash = self.get_hash_from_files(input_files) if force_refresh or new_hash != old_hash: self.logger.debug("Regenerating") shutil.rmtree(self.output_dir, ignore_errors=True) mkdirs(self.output_dir) self.logger.debug(f"Copying from {temp_dir} to {self.output_dir}") copytree(temp_dir, self.output_dir) self.save_new_hash(new_hash) return new_hash, train_info_local else: self.logger.debug("Not regenerating") return None, train_info_local
def write_input(self): # As Pippin only does one GENVERSION at a time, lets extract it first, and also the config c = self.yaml["CONFIG"] d = self.yaml["GENVERSION_LIST"][0] g = self.yaml["GENOPT_GLOBAL"] # Ensure g is a dict with a ref we can update if g is None: g = {} self.yaml["GENOPT_GLOBAL"] = g # Start setting properties in the right area d["GENVERSION"] = self.genversion # Logging now goes in the "CONFIG" c["LOGDIR"] = os.path.basename(self.sim_log_dir) for k in self.config.keys(): if k.upper() not in self.reserved_top: run_config = self.config[k] run_config_keys = list(run_config.keys()) assert "BASE" in run_config_keys, "You must specify a base file for each option" for key in run_config_keys: if key.upper() in self.reserved_keywords: continue base_file = run_config["BASE"] match = os.path.basename(base_file).split(".")[0] val = run_config[key] if not isinstance(val, list): val = [val] lookup = f"GENOPT({match})" if lookup not in d: d[lookup] = {} for v in val: d[lookup][key] = v if len(self.data_dirs) > 1: data_dir = self.data_dirs[0] c["PATH_USER_INPUT"] = data_dir for key in self.config.get("GLOBAL", []): if key.upper() == "BASE": continue direct_set = [ "FORMAT_MASK", "RANSEED_REPEAT", "RANSEED_CHANGE", "BATCH_INFO", "BATCH_MEM", "NGEN_UNIT", "RESET_CIDOFF" ] if key in direct_set: c[key] = self.config["GLOBAL"][key] else: g[key] = self.config["GLOBAL"][key] if self.derived_batch_info: c["BATCH_INFO"] = self.derived_batch_info if key == "RANSEED_CHANGE" and c.get("RANSEED_REPEAT") is not None: del c["RANSEED_REPEAT"] elif key == "RANSEED_REPEAT" and c.get( "RANSEED_CHANGE") is not None: del c["RANSEED_CHANGE"] if self.base_ia: c["SIMGEN_INFILE_Ia"] = [os.path.basename(f) for f in self.base_ia] else: del c["SIMGEN_INFILE_Ia"] if self.base_cc: c["SIMGEN_INFILE_NONIa"] = [ os.path.basename(f) for f in self.base_cc ] else: del c["SIMGEN_INFILE_NONIa"] c["GENPREFIX"] = self.genprefix # Put config in a temp directory temp_dir_obj = tempfile.TemporaryDirectory() temp_dir = temp_dir_obj.name # Copy the base files across input_paths = [] for f in self.base_ia + self.base_cc: resolved = get_data_loc(f) shutil.copy(resolved, temp_dir) input_paths.append(os.path.join(temp_dir, os.path.basename(f))) self.logger.debug(f"Copying input file {resolved} to {temp_dir}") # Copy the include input file if there is one input_copied = [] fs = self.base_ia + self.base_cc for ff in fs: if ff not in input_copied: input_copied.append(ff) path = get_data_loc(ff) copied_path = os.path.join(temp_dir, os.path.basename(path)) with open(path, "r") as f: for line in f.readlines(): line = line.strip() if line.startswith("INPUT_FILE_INCLUDE"): include_file = line.split(":")[-1].strip() include_file_path = get_data_loc(include_file) self.logger.debug( f"Copying INPUT_FILE_INCLUDE file {include_file_path} to {temp_dir}" ) include_file_basename = os.path.basename( include_file_path) include_file_output = os.path.join( temp_dir, include_file_basename) if include_file_output not in input_copied: # Copy include file into the temp dir shutil.copy(include_file_path, temp_dir) # Then SED the file to replace the full path with just the basename if include_file != include_file_basename: sed_command = f"sed -i -e 's|{include_file}|{include_file_basename}|g' {copied_path}" self.logger.debug( f"Running sed command: {sed_command}") subprocess.run(sed_command, stderr=subprocess.STDOUT, cwd=temp_dir, shell=True) # And make sure we dont do this file again fs.append(include_file_output) # Write the primary input file main_input_file = f"{temp_dir}/{self.genversion}.input" self.write_output_file(main_input_file) # Remove any duplicates and order the output files output_files = [ f"{temp_dir}/{a}" for a in sorted(os.listdir(temp_dir)) ] self.logger.debug( f"{len(output_files)} files used to create simulation. Hashing them." ) # Get current hash new_hash = self.get_hash_from_files(output_files) regenerate = self._check_regenerate(new_hash) if regenerate: self.logger.info(f"Running simulation") # Clean output dir. God I feel dangerous doing this, so hopefully unnecessary check if "//" not in self.output_dir and len(self.output_dir) > 30: self.logger.debug( f"Cleaning output directory {self.output_dir}") shutil.rmtree(self.output_dir, ignore_errors=True) mkdirs(self.output_dir) self.logger.debug( f"Copying from {temp_dir} to {self.output_dir}") copytree(temp_dir, self.output_dir) self.save_new_hash(new_hash) else: self.logger.error( f"Seems to be an issue with the output dir path: {self.output_dir}" ) chown_dir(self.output_dir) else: self.logger.info("Hash check passed, not rerunning") temp_dir_obj.cleanup() return regenerate, new_hash