def classify(self): new_hash = self.get_hash_from_string(self.name + f"{self.prob_ia}_{self.prob_cc}") if self._check_regenerate(new_hash): shutil.rmtree(self.output_dir, ignore_errors=True) mkdirs(self.output_dir) try: name = self.get_prob_column_name() cid = "CID" s = self.get_simulation_dependency() df = None phot_dir = s.output["photometry_dirs"][self.index] headers = [ os.path.join(phot_dir, a) for a in os.listdir(phot_dir) if "HEAD" in a ] if not headers: Task.fail_config( f"No HEAD fits files found in {phot_dir}!") else: types = self.get_simulation_dependency( ).output["types_dict"] self.logger.debug(f"Input types are {types}") for h in headers: with fits.open(h) as hdul: data = hdul[1].data snid = np.array(data.field("SNID")) sntype = np.array(data.field("SNTYPE")).astype( np.int64) is_ia = np.isin(sntype, types["IA"]) prob = (is_ia * self.prob_ia) + (~is_ia * self.prob_cc) dataframe = pd.DataFrame({cid: snid, name: prob}) dataframe[cid] = dataframe[cid].apply(str) dataframe[cid] = dataframe[cid].str.strip() if df is None: df = dataframe else: df = pd.concat([df, dataframe]) df.drop_duplicates(subset=cid, inplace=True) self.logger.info(f"Saving probabilities to {self.output_file}") df.to_csv(self.output_file, index=False, float_format="%0.4f") chown_dir(self.output_dir) with open(self.done_file, "w") as f: f.write("SUCCESS") self.save_new_hash(new_hash) except Exception as e: self.logger.exception(e, exc_info=True) self.passed = False with open(self.done_file, "w") as f: f.write("FAILED") return False else: self.should_be_done() self.passed = True return True
def _check_completion(self, squeue): if os.path.exists(self.done_file): self.logger.debug( f"Merger finished, see combined fitres at {self.suboutput_dir}" ) return Task.FINISHED_SUCCESS else: output_error = False if os.path.exists(self.logfile): with open(self.logfile, "r") as f: for line in f.read().splitlines(): if "ERROR" in line or "ABORT" in line: self.logger.error( f"Fatal error in combine_fitres. See {self.logfile} for details." ) output_error = True if output_error: self.logger.info(f"Excerpt: {line}") if output_error: self.logger.debug("Removing hash on failure") os.remove(self.hash_file) chown_dir(self.output_dir) else: self.logger.error( "Combine task failed with no output log. Please debug") return Task.FINISHED_FAILURE
def classify(self): new_hash = self.get_hash_from_string(self.name) if self._check_regenerate(new_hash): mkdirs(self.output_dir) input = self.get_fit_dependency() fitres_file = os.path.join(input["fitres_dirs"][self.index], input["fitopt_map"][self.fitopt]) self.logger.debug(f"Looking for {fitres_file}") if not os.path.exists(fitres_file): self.logger.error( f"FITRES file could not be found at {fitres_file}, classifer has nothing to work with" ) self.passed = False return False df = pd.read_csv(fitres_file, delim_whitespace=True, comment="#") df = df[[ "CID", "FITPROB" ]].rename(columns={"FITPROB": self.get_prob_column_name()}) self.logger.info(f"Saving probabilities to {self.output_file}") df.to_csv(self.output_file, index=False, float_format="%0.4f") chown_dir(self.output_dir) with open(self.done_file, "w") as f: f.write("SUCCESS") self.save_new_hash(new_hash) self.passed = True return True
def classify(self, force_refresh): new_hash = self.check_regenerate(force_refresh) if new_hash: mkdirs(self.output_dir) input = self.get_fit_dependency() fitres_file = input["fitres_file"] self.logger.debug(f"Looking for {fitres_file}") if not os.path.exists(fitres_file): self.logger.error( f"FITRES file could not be found at {fitres_file}, classifer has nothing to work with" ) self.passed = False return False df = pd.read_csv(fitres_file, sep='\s+', comment="#", compression="infer") df = df[[ "CID", "FITPROB" ]].rename(columns={"FITPROB": self.get_prob_column_name()}) self.logger.info(f"Saving probabilities to {self.output_file}") df.to_csv(self.output_file, index=False, float_format="%0.4f") chown_dir(self.output_dir) with open(self.done_file, "w") as f: f.write("SUCCESS") self.save_new_hash(new_hash) self.passed = True return True
def write_nml(self, force_refresh): # Parse config, first SNLCINP and then FITINP for key, value in self.config.get("SNLCINP", {}).items(): self.set_snlcinp(key, value) for key, value in self.config.get("FITINP", {}).items(): self.set_fitinp(key, value) for key, value in self.options.items(): self.set_property(key, value, assignment=": ", section_end="&SNLCINP") if self.sim_task.output["ranseed_change"]: self.set_property("VERSION", self.sim_version + "-0*", assignment=": ", section_end="&SNLCINP") else: self.set_property("VERSION", self.sim_version, assignment=": ", section_end="&SNLCINP") self.set_property("OUTDIR", self.lc_output_dir, assignment=": ", section_end="&SNLCINP") self.set_property("DONE_STAMP", "FINISHED.DONE", assignment=": ", section_end="&SNLCINP") if isinstance(self.sim_task, DataPrep): self.set_snlcinp("PRIVATE_DATA_PATH", f"'{self.sim_task.output['data_path']}'") self.set_snlcinp("VERSION_PHOTOMETRY", f"'{self.sim_task.output['genversion']}'") # We want to do our hashing check here string_to_hash = self.fitopts + self.base new_hash = self.get_hash_from_string("".join(string_to_hash)) old_hash = self.get_old_hash() regenerate = force_refresh or (old_hash is None or old_hash != new_hash) if regenerate: self.logger.info(f"Running Light curve fit. Removing output_dir") shutil.rmtree(self.output_dir, ignore_errors=True) mkdirs(self.output_dir) # Write main file with open(self.config_path, "w") as f: f.writelines(map(lambda s: s + "\n", string_to_hash)) self.logger.info(f"NML file written to {self.config_path}") self.save_new_hash(new_hash) chown_dir(self.output_dir) else: self.logger.info("Hash check passed, not rerunning") return regenerate, new_hash
def _run(self, force_refresh): regenerating = self.write_input(force_refresh) return False # TODO: Remove this so it runs when I figure out how the output is location if regenerating: command = ["SALT2mu_fit.pl", self.config_path] for d in self.data: command += ["INPDIR+", d] command += ["NOPROMPT"] with open(self.logging_file, "w") as f: subprocess.run(command, stdout=f, stderr=subprocess.STDOUT, cwd=self.output_dir) chown_dir(self.output_dir) return True
def _check_completion(self, squeue): if os.path.exists(self.done_file): self.logger.debug( f"Merger finished, see combined fitres at {self.done_file}") # Copy MERGE.LOG and FITOPT.README if they aren't there filenames = ["MERGE.LOG", "FITOPT.README"] for f in filenames: original = os.path.join(self.lc_fit["lc_output_dir"], f) moved = os.path.join(self.output_dir, f) if not os.path.exists(moved): self.logger.debug("Copying file {f} into output directory") shutil.move(original, moved) # Dick around with folders and names to make it resemble split_and_fit output for salt2mu outdir = os.path.join(self.output_dir, self.lc_fit["genversion"]) new_output = os.path.join(outdir, "FITOPT000.FITRES") if not os.path.exists(outdir): os.makedirs(outdir, exist_ok=True) original_output = self.done_file shutil.move(original_output, new_output) # Recreate done file -_- with open(self.done_file, "w") as f: f.write("SUCCESS") self.output["fitres_file"] = new_output self.output["fitres_dir"] = outdir return Task.FINISHED_SUCCESS else: output_error = False if os.path.exists(self.logfile): with open(self.logfile, "r") as f: for line in f.read().splitlines(): if "ERROR" in line or "ABORT" in line: self.logger.error( f"Fatal error in combine_fitres. See {self.logfile} for details." ) output_error = True if output_error: self.logger.info(f"Excerpt: {line}") if output_error: self.logger.debug("Removing hash on failure") os.remove(self.hash_file) chown_dir(self.output_dir) return Task.FINISHED_FAILURE else: self.logger.error( "Combine task failed with no output log. Please debug") return Task.FINISHED_FAILURE
def write_nml(self, force_refresh): self.logger.debug(f"Loading fitopts file from {self.fitopts_file}") with open(self.fitopts_file, "r") as f: self.fitopts = list(f.read().splitlines()) self.logger.info( f"Loaded {len(self.fitopts)} fitopts file from {self.fitopts_file}" ) # Parse config, first SNLCINP and then FITINP for key, value in self.config.get("SNLCINP", {}).items(): self.set_snlcinp(key, value) for key, value in self.config.get("FITINP", {}).items(): self.set_fitinp(key, value) self.set_property( "VERSION", self.sim_version + "*", assignment=": ", section_end="&SNLCINP") # TODO FIX THIS, DOUBLE VERSION KEY self.set_property("OUTDIR", self.lc_output_dir, assignment=": ", section_end="&SNLCINP") if isinstance(self.sim_task, DataPrep): self.set_snlcinp("PRIVATE_DATA_PATH", f"'{self.sim_task.output['data_path']}'") self.set_snlcinp("VERSION_PHOTOMETRY", f"'{self.sim_task.output['genversion']}'") # We want to do our hashing check here string_to_hash = self.fitopts + self.base # with open(os.path.abspath(inspect.stack()[0][1]), "r") as f: # string_to_hash += f.read() new_hash = self.get_hash_from_string("".join(string_to_hash)) old_hash = self.get_old_hash() regenerate = force_refresh or (old_hash is None or old_hash != new_hash) if regenerate: self.logger.info(f"Running Light curve fit. Removing output_dir") shutil.rmtree(self.output_dir, ignore_errors=True) mkdirs(self.output_dir) # Write main file with open(self.config_path, "w") as f: f.writelines(map(lambda s: s + '\n', string_to_hash)) self.logger.info(f"NML file written to {self.config_path}") self.save_new_hash(new_hash) chown_dir(self.output_dir) else: self.logger.info("Hash check passed, not rerunning") return regenerate, new_hash
def _run(self, force_refresh): regenerating = self.write_input(force_refresh) if regenerating: command = ["SALT2mu_fit.pl", self.config_filename] for d in self.data: command += ["INPDIR+", d] command += ["NOPROMPT"] self.logger.debug(f"Will check for done file at {self.done_file}") self.logger.debug(f"Will output log at {self.logging_file}") self.logger.debug(f"Running command: {' '.join(command)}") with open(self.logging_file, "w") as f: subprocess.run(command, stdout=f, stderr=subprocess.STDOUT, cwd=self.output_dir) chown_dir(self.output_dir) return True
def _check_completion(self, squeue): # Check for errors for file in self.log_files: if os.path.exists(file): with open(file, "r") as f: output_error = False for line in f.read().splitlines(): if ("ERROR" in line or ("ABORT" in line and " 0 " not in line)) and not output_error: self.logger.error( f"Fatal error in light curve fitting. See {file} for details." ) output_error = True if output_error: self.logger.info(f"Excerpt: {line}") if output_error: return Task.FINISHED_FAILURE # Check for existence of SPLIT_JOBS_LCFIT.tar.gz to see if job is done if os.path.exists(self.done_file): self.logger.info("Light curve done file found") logging_file2 = self.logging_file.replace("_log", "_log2") if not os.path.exists(logging_file2): self.logger.info( "Tarball found, fitting complete, cleaning up the directory" ) try: with open(logging_file2, "w") as f: subprocess.run( ["split_and_fit.pl", "CLEANMASK", "4", "NOPROMPT"], stdout=f, stderr=subprocess.STDOUT, cwd=self.output_dir, check=True) time.sleep(2) except subprocess.CalledProcessError as e: self.logger.warning( f"split_and_fit.pl has a return code of {e.returncode}. This may or may not be an issue." ) chown_dir(self.output_dir) self.print_stats() self.output["fitres_file"] = os.path.abspath( os.path.join(self.fitres_dir, "FITOPT000.FITRES.gz")) # TODO: Ask rick if there return Task.FINISHED_SUCCESS return 0
def write_nml(self): # Parse config, first SNLCINP and then FITINP for key, value in self.config.get("SNLCINP", {}).items(): self.set_snlcinp(key, value) for key, value in self.config.get("FITINP", {}).items(): self.set_fitinp(key, value) self.set_property( "VERSION", self.sim_version + "*", assignment=":", section_end="&SNLCINP") # TODO FIX THIS, DOUBLE VERSION KEY self.set_property("OUTDIR", self.lc_output_dir, assignment=":", section_end="&SNLCINP") # Load old hash old_hash = None hash_file = f"{self.output_dir}/hash.txt" if os.path.exists(hash_file): with open(hash_file, "r") as f: old_hash = f.read().strip() self.logger.debug(f"Previous result found, hash is {old_hash}") # We want to do our hashing check here total_string = self.fitopts + self.base string_to_hash = self.sim_hash + "".join(total_string) with open(os.path.abspath(inspect.stack()[0][1]), "r") as f: string_to_hash += f.read() new_hash = get_hash(string_to_hash) self.logger.debug(f"Current hash set to {new_hash}") regenerate = old_hash is None or old_hash != new_hash if regenerate: self.logger.info(f"Running Light curve fit, hash check failed") # Write main file with open(self.config_path, "w") as f: f.writelines(map(lambda s: s + '\n', total_string)) self.logger.info(f"NML file written to {self.config_path}") with open(hash_file, "w") as f: f.write(str(new_hash)) self.logger.debug(f"New hash saved to {hash_file}") chown_dir(self.output_dir) return regenerate, new_hash
def write_nml(self): # Parse config, first SNLCINP and then FITINP for key, value in self.config.get("SNLCINP", {}).items(): self.set_snlcinp(key, value) for key, value in self.config.get("FITINP", {}).items(): self.set_fitinp(key, value) for key, value in self.options.items(): #print(key,value) self.yaml["CONFIG"][key] = value self.compute_fitopts() if self.sim_task.output["ranseed_change"]: self.yaml["CONFIG"]["VERSION"] = [self.sim_version + "-0*"] else: self.yaml["CONFIG"]["VERSION"] = [self.sim_version] self.yaml["CONFIG"]["OUTDIR"] = self.lc_output_dir # self.yaml["CONFIG"]["DONE_STAMP"] = "ALL.DONE" if isinstance(self.sim_task, DataPrep): data_path = self.sim_task.output["data_path"] if "SNDATA_ROOT/lcmerge" not in data_path: self.set_snlcinp("PRIVATE_DATA_PATH", f"'{self.sim_task.output['data_path']}'") self.set_snlcinp("VERSION_PHOTOMETRY", f"'{self.sim_task.output['genversion']}'") # We want to do our hashing check here string_to_hash = self.get_output_string() new_hash = self.get_hash_from_string(string_to_hash) regenerate = self._check_regenerate(new_hash) if regenerate: self.logger.info(f"Running Light curve fit. Removing output_dir") shutil.rmtree(self.output_dir, ignore_errors=True) mkdirs(self.output_dir) # Write main file # Write the primary input file self.write_output_file(self.config_path) self.logger.info(f"NML file written to {self.config_path}") self.save_new_hash(new_hash) chown_dir(self.output_dir) else: self.logger.info("Hash check passed, not rerunning") return regenerate, new_hash
def check_task_completion(self, t, blocked_tasks, done_tasks, failed_tasks, running_tasks, squeue): result = t.check_completion(squeue) # If its finished, good or bad, juggle tasks if result in [Task.FINISHED_SUCCESS, Task.FINISHED_FAILURE]: if t.gpu: self.num_jobs_queue_gpu -= t.num_jobs else: self.num_jobs_queue -= t.num_jobs if result == Task.FINISHED_SUCCESS: running_tasks.remove(t) self.logger.notice( f"FINISHED: {t}, total jobs now {self.num_jobs_queue}") done_tasks.append(t) else: self.fail_task(t, running_tasks, failed_tasks, blocked_tasks) chown_dir(t.output_dir) return True return False
def _run(self): self.yaml["CONFIG"]["WFITOPT"] = self.wfitopts self.yaml["CONFIG"]["INPDIR"] = self.create_cov_dirs self.yaml["CONFIG"]["OUTDIR"] = os.path.join(self.output_dir, "output") # Pass all OPTS keys through to the yaml dictionary for k, v in self.options.items(): # Clobber WFITOPTS to WFITOPT if k == "WFITOPTS": k = "WFITOPT" self.yaml["CONFIG"][k] = v final_output_for_hash = self.get_output_string() new_hash = self.get_hash_from_string(final_output_for_hash) if self._check_regenerate(new_hash): self.logger.debug("Regenerating and launching task") shutil.rmtree(self.output_dir, ignore_errors=True) mkdirs(self.output_dir) self.save_new_hash(new_hash) with open(self.input_file, "w") as f: f.write(self.get_output_string()) cmd = ["submit_batch_jobs.sh", os.path.basename(self.input_file)] self.logger.debug( f"Submitting wfit job: {' '.join(cmd)} in cwd: {self.output_dir}" ) self.logger.debug(f"Logging to {self.logfile}") with open(self.logfile, 'w') as f: subprocess.run(' '.join(cmd), stdout=f, stderr=subprocess.STDOUT, cwd=self.output_dir, shell=True) chown_dir(self.output_dir) else: self.should_be_done() self.logger.info("Has check passed, not rerunning") return True
def classify(self): mkdirs(self.output_dir) fitres = f"{self.fit_dir}/FITOPT000.FITRES.gz" self.logger.debug(f"Looking for {fitres}") if not os.path.exists(fitres): self.logger.error( f"FITRES file could not be found at {fitres}, classifer has nothing to work with" ) return False data = pd.read_csv(fitres, sep='\s+', comment="#", compression="infer") ids = data["CID"].values probability = np.random.uniform(size=ids.size) combined = np.vstack((ids, probability)).T output_file = self.output_dir + "/prob.txt" self.logger.info(f"Saving probabilities to {output_file}") np.savetxt(output_file, combined) chown_dir(self.output_dir) return True # change to hash
def run(self): regenerate, new_hash = self.write_input() if not regenerate: return new_hash logging_file = self.config_path.replace(".input", ".input_log") with open(logging_file, "w") as f: subprocess.run(["sim_SNmix.pl", self.config_path], stdout=f, stderr=subprocess.STDOUT, cwd=self.output_dir) shutil.chown(logging_file, group=self.global_config["SNANA"]["group"]) self.logger.info(f"Sim running and logging outputting to {logging_file}") sim_log_dir = f"{self.output_dir}/SIMLOGS_{self.genversion}" done_file = f"{sim_log_dir}/SIMJOB_ALL.DONE" # Monitor for success or failure time.sleep(10) while True: # Check log for errors and if found, print the rest of the log so you dont have to look up the file output_error = False if os.path.exists(logging_file): with open(logging_file, "r") as f: for line in f.read().splitlines(): if "ERROR" in line: self.logger.critical(f"Fatal error in simulation. See {logging_file} for details.") output_error = True if output_error: self.logger.error(f"Excerpt: {line}") if output_error: self.logger.debug("Removing hash on failure") os.remove(self.hash_file) chown_dir(self.output_dir) return False for file in os.listdir(sim_log_dir): if not file.startswith("TMP") or not file.endswith(".LOG"): continue with open(sim_log_dir + "/" + file, "r") as f: for line in f.read().splitlines(): if (" ABORT " in line or "FATAL[" in line) and not output_error: output_error = True self.logger.critical(f"Fatal error in simulation. See {sim_log_dir}/{file} for details.") if output_error: self.logger.error(f"Excerpt: {line}") if output_error: self.logger.debug("Removing hash on failure") os.remove(self.hash_file) chown_dir(self.output_dir) return False # Check to see if the done file exists if os.path.exists(done_file): sim_folder = os.path.expandvars(f"{self.global_config['SNANA']['sim_dir']}/{self.genversion}") sim_folder_endpoint = f"{self.output_dir}/{self.genversion}" self.logger.info("Done file found, creating symlinks") self.logger.debug(f"Linking {sim_folder} -> {sim_folder_endpoint}") os.symlink(sim_folder, sim_folder_endpoint, target_is_directory=True) chown_dir(self.output_dir) return new_hash time.sleep(self.global_config["OUTPUT"].getint("ping_frequency"))
def _run(self): if self.blind: self.logger.info("NOTE: This run is being BLINDED") regenerating = self.write_input() if regenerating: command = [ "submit_batch_jobs.sh", os.path.basename(self.config_filename) ] self.logger.debug(f"Will check for done file at {self.done_file}") self.logger.debug(f"Will output log at {self.logging_file}") self.logger.debug(f"Running command: {' '.join(command)}") with open(self.logging_file, "w") as f: subprocess.run([' '.join(command)], stdout=f, stderr=subprocess.STDOUT, cwd=self.output_dir, shell=True) chown_dir(self.output_dir) self.set_m0dif_dirs() else: self.should_be_done() self.logger.info("Hash check passed, not rerunning") return True
def check_task_completion(self, t, squeue): result = t.check_completion(squeue) # If its finished, good or bad, juggle tasks if result in [Task.FINISHED_SUCCESS, Task.FINISHED_FAILURE]: if t.gpu: self.num_jobs_queue_gpu -= t.num_jobs else: self.num_jobs_queue -= t.num_jobs if result == Task.FINISHED_SUCCESS: self.running.remove(t) self.logger.notice( f"FINISHED: {t} with {t.num_jobs} NUM_JOBS. NUM_JOBS now {self.num_jobs_queue}" ) self.done.append(t) if self.compress: t.compress() else: self.fail_task(t) if os.path.exists(t.output_dir): chown_dir(t.output_dir) else: chown_file(t.output_dir + ".tar.gz") return True return False
def write_input(self): # Load previous hash here if it exists old_hash = None hash_file = f"{self.output_dir}/hash.txt" if os.path.exists(hash_file): with open(hash_file, "r") as f: old_hash = f.read().strip() self.logger.debug(f"Previous result found, hash is {old_hash}") # Put config in a temp directory temp_dir_obj = tempfile.TemporaryDirectory() temp_dir = temp_dir_obj.name # Copy the base files across for f in self.base_ia: shutil.copy(self.data_dir + f, temp_dir) for f in self.base_cc: shutil.copy(self.data_dir + f, temp_dir) # Copy the include input file if there is one input_copied = [] fs = self.base_ia + self.base_cc for ff in fs: if ff not in input_copied: input_copied.append(ff) with open(self.data_dir + ff, "r") as f: for line in f.readlines(): line = line.strip() if line.startswith("INPUT_FILE_INCLUDE"): include_file = line.split(":")[-1].strip() self.logger.debug(f"Copying included file {include_file}") shutil.copy(self.data_dir + include_file, temp_dir) # Write the primary input file main_input_file = f"{temp_dir}/{self.genversion}.input" with open(main_input_file, "w") as f: f.writelines(map(lambda s: s + '\n', self.base)) self.logger.info(f"Input file written to {main_input_file}") # Remove any duplicates and order the output files output_files = [f"{temp_dir}/{a}" for a in sorted(os.listdir(temp_dir))] self.logger.debug(f"{len(output_files)} files used to create simulation. Hashing them.") # Also add this file to the hash, so if the code changes we also regenerate. Smart. output_files.append(os.path.abspath(inspect.stack()[0][1])) # Get current hash string_to_hash = "" for file in output_files: with open(file, "r") as f: string_to_hash += f.read() new_hash = get_hash(string_to_hash) self.logger.debug(f"Current hash set to {new_hash}") regenerate = old_hash is None or old_hash != new_hash if regenerate: self.logger.info(f"Running simulation, hash check failed") # Clean output dir. God I feel dangerous doing this, so hopefully unnecessary check if "//" not in self.output_dir and "Pippin" in self.output_dir: self.logger.debug(f"Cleaning output directory {self.output_dir}") shutil.rmtree(self.output_dir, ignore_errors=True) mkdirs(self.output_dir) self.logger.debug(f"Copying from {temp_dir} to {self.output_dir}") copytree(temp_dir, self.output_dir) with open(hash_file, "w") as f: f.write(str(new_hash)) self.logger.debug(f"New hash saved to {hash_file}") self.hash_file = hash_file chown_dir(self.output_dir) else: self.logger.info("Hash check passed, not rerunning") temp_dir_obj.cleanup() return regenerate, new_hash
def write_input(self): # As Pippin only does one GENVERSION at a time, lets extract it first, and also the config c = self.yaml["CONFIG"] d = self.yaml["GENVERSION_LIST"][0] g = self.yaml["GENOPT_GLOBAL"] # Ensure g is a dict with a ref we can update if g is None: g = {} self.yaml["GENOPT_GLOBAL"] = g # Start setting properties in the right area d["GENVERSION"] = self.genversion # Logging now goes in the "CONFIG" c["LOGDIR"] = os.path.basename(self.sim_log_dir) for k in self.config.keys(): if k.upper() not in self.reserved_top: run_config = self.config[k] run_config_keys = list(run_config.keys()) assert "BASE" in run_config_keys, "You must specify a base file for each option" for key in run_config_keys: if key.upper() in self.reserved_keywords: continue base_file = run_config["BASE"] match = os.path.basename(base_file).split(".")[0] val = run_config[key] if not isinstance(val, list): val = [val] lookup = f"GENOPT({match})" if lookup not in d: d[lookup] = {} for v in val: d[lookup][key] = v if len(self.data_dirs) > 1: data_dir = self.data_dirs[0] c["PATH_USER_INPUT"] = data_dir for key in self.config.get("GLOBAL", []): if key.upper() == "BASE": continue direct_set = [ "FORMAT_MASK", "RANSEED_REPEAT", "RANSEED_CHANGE", "BATCH_INFO", "BATCH_MEM", "NGEN_UNIT", "RESET_CIDOFF" ] if key in direct_set: c[key] = self.config["GLOBAL"][key] else: g[key] = self.config["GLOBAL"][key] if self.derived_batch_info: c["BATCH_INFO"] = self.derived_batch_info if key == "RANSEED_CHANGE" and c.get("RANSEED_REPEAT") is not None: del c["RANSEED_REPEAT"] elif key == "RANSEED_REPEAT" and c.get( "RANSEED_CHANGE") is not None: del c["RANSEED_CHANGE"] if self.base_ia: c["SIMGEN_INFILE_Ia"] = [os.path.basename(f) for f in self.base_ia] else: del c["SIMGEN_INFILE_Ia"] if self.base_cc: c["SIMGEN_INFILE_NONIa"] = [ os.path.basename(f) for f in self.base_cc ] else: del c["SIMGEN_INFILE_NONIa"] c["GENPREFIX"] = self.genprefix # Put config in a temp directory temp_dir_obj = tempfile.TemporaryDirectory() temp_dir = temp_dir_obj.name # Copy the base files across input_paths = [] for f in self.base_ia + self.base_cc: resolved = get_data_loc(f) shutil.copy(resolved, temp_dir) input_paths.append(os.path.join(temp_dir, os.path.basename(f))) self.logger.debug(f"Copying input file {resolved} to {temp_dir}") # Copy the include input file if there is one input_copied = [] fs = self.base_ia + self.base_cc for ff in fs: if ff not in input_copied: input_copied.append(ff) path = get_data_loc(ff) copied_path = os.path.join(temp_dir, os.path.basename(path)) with open(path, "r") as f: for line in f.readlines(): line = line.strip() if line.startswith("INPUT_FILE_INCLUDE"): include_file = line.split(":")[-1].strip() include_file_path = get_data_loc(include_file) self.logger.debug( f"Copying INPUT_FILE_INCLUDE file {include_file_path} to {temp_dir}" ) include_file_basename = os.path.basename( include_file_path) include_file_output = os.path.join( temp_dir, include_file_basename) if include_file_output not in input_copied: # Copy include file into the temp dir shutil.copy(include_file_path, temp_dir) # Then SED the file to replace the full path with just the basename if include_file != include_file_basename: sed_command = f"sed -i -e 's|{include_file}|{include_file_basename}|g' {copied_path}" self.logger.debug( f"Running sed command: {sed_command}") subprocess.run(sed_command, stderr=subprocess.STDOUT, cwd=temp_dir, shell=True) # And make sure we dont do this file again fs.append(include_file_output) # Write the primary input file main_input_file = f"{temp_dir}/{self.genversion}.input" self.write_output_file(main_input_file) # Remove any duplicates and order the output files output_files = [ f"{temp_dir}/{a}" for a in sorted(os.listdir(temp_dir)) ] self.logger.debug( f"{len(output_files)} files used to create simulation. Hashing them." ) # Get current hash new_hash = self.get_hash_from_files(output_files) regenerate = self._check_regenerate(new_hash) if regenerate: self.logger.info(f"Running simulation") # Clean output dir. God I feel dangerous doing this, so hopefully unnecessary check if "//" not in self.output_dir and len(self.output_dir) > 30: self.logger.debug( f"Cleaning output directory {self.output_dir}") shutil.rmtree(self.output_dir, ignore_errors=True) mkdirs(self.output_dir) self.logger.debug( f"Copying from {temp_dir} to {self.output_dir}") copytree(temp_dir, self.output_dir) self.save_new_hash(new_hash) else: self.logger.error( f"Seems to be an issue with the output dir path: {self.output_dir}" ) chown_dir(self.output_dir) else: self.logger.info("Hash check passed, not rerunning") temp_dir_obj.cleanup() return regenerate, new_hash
def _check_completion(self, squeue): if os.path.exists( self.done_file) or not os.path.exists(self.total_summary): if os.path.exists(self.done_file): self.logger.info(f"Simulation {self.name} found done file!") with open(self.done_file) as f: if "FAIL" in f.read(): self.logger.error( f"Done file {self.done_file} reporting failure") return self.check_issues() else: self.logger.error( "MERGE.LOG was not created, job died on submission") return self.check_issues() if os.path.exists(self.total_summary): y = read_yaml(self.total_summary) if "MERGE" in y.keys(): for i, row in enumerate(y["MERGE"]): if len( row ) == 6: # Old version for backward compatibility (before 15/01/2021) state, iver, version, ngen, nwrite, cpu = row else: # New MERGE.LOG syntax (after 15/01/2021) state, iver, version, ngen, nwrite, nspec, cpu = row if cpu < 60: units = "minutes" else: cpu = cpu / 60 units = "hours" self.logger.info( f"Simulation {i + 1} generated {ngen} events and wrote {nwrite} to file, taking {cpu:0.1f} CPU {units}" ) else: self.logger.error( f"File {self.total_summary} does not have a MERGE section - did it die?" ) return self.kill_and_fail() if "SURVEY" in y.keys(): self.output["SURVEY"] = y["SURVEY"] self.output["SURVEY_ID"] = y["IDSURVEY"] else: self.output["SURVEY"] = "UNKNOWN" self.output["SURVEY_ID"] = 0 else: self.logger.warning(f"Cannot find {self.total_summary}") self.logger.info("Done file found, creating symlinks") s_ends = [ os.path.join(self.output_dir, os.path.basename(s)) for s in self.sim_folders ] for s, s_end in zip(self.sim_folders, s_ends): if not os.path.exists(s_end): # Check to make sure there isn't a broken symlink at s_end # os.path.exists will return false for broken symlinks, even if one exists if os.path.islink(s_end): self.logger.error( f"Symlink {s_end} exists and is pointing to a broken or missing directory" ) return Task.FINISHED_FAILURE else: self.logger.debug(f"Linking {s} -> {s_end}") os.symlink(s, s_end, target_is_directory=True) chown_dir(self.output_dir) self.output.update({"photometry_dirs": s_ends}) return Task.FINISHED_SUCCESS return self.check_for_job(squeue, f"{self.genversion}.input-CPU")
def write_input(self, force_refresh): self.set_property("GENVERSION", self.genversion, assignment=": ", section_end="ENDLIST_GENVERSION") self.set_property("LOGDIR", os.path.basename(self.sim_log_dir), assignment=": ", section_end="ENDLIST_GENVERSION") for k in self.config.keys(): if k.upper() != "GLOBAL": run_config = self.config[k] run_config_keys = list(run_config.keys()) assert "BASE" in run_config_keys, "You must specify a base file for each option" for key in run_config_keys: if key.upper() in self.reserved_keywords: continue base_file = run_config["BASE"] match = os.path.basename(base_file).split(".")[0] val = run_config[key] if not isinstance(val, list): val = [val] for v in val: self.set_property(f"GENOPT({match})", f"{key} {v}", section_end="ENDLIST_GENVERSION", only_add=True) if len(self.data_dirs) > 1: data_dir = self.data_dirs[0] self.set_property("PATH_USER_INPUT", data_dir, assignment=": ") for key in self.config.get("GLOBAL", []): if key.upper() == "BASE": continue direct_set = [ "FORMAT_MASK", "RANSEED_REPEAT", "RANSEED_CHANGE", "BATCH_INFO", "BATCH_MEM", "NGEN_UNIT", "RESET_CIDOFF" ] if key in direct_set: self.set_property(key, self.config["GLOBAL"][key], assignment=": ") else: self.set_property(f"GENOPT_GLOBAL: {key}", self.config["GLOBAL"][key], assignment=" ") if self.derived_batch_info: self.set_property("BATCH_INFO", self.derived_batch_info, assignment=": ") if key == "RANSEED_CHANGE": self.delete_property("RANSEED_REPEAT") elif key == "RANSEED_REPEAT": self.delete_property("RANSEED_CHANGE") self.set_property( "SIMGEN_INFILE_Ia", " ".join([os.path.basename(f) for f in self.base_ia]) if self.base_ia else None) self.set_property( "SIMGEN_INFILE_NONIa", " ".join([os.path.basename(f) for f in self.base_cc]) if self.base_cc else None) self.set_property("GENPREFIX", self.genprefix) # Put config in a temp directory temp_dir_obj = tempfile.TemporaryDirectory() temp_dir = temp_dir_obj.name # Copy the base files across input_paths = [] for f in self.base_ia + self.base_cc: resolved = get_data_loc(f) shutil.copy(resolved, temp_dir) input_paths.append(os.path.join(temp_dir, os.path.basename(f))) self.logger.debug(f"Copying input file {resolved} to {temp_dir}") # Copy the include input file if there is one input_copied = [] fs = self.base_ia + self.base_cc for ff in fs: if ff not in input_copied: input_copied.append(ff) path = get_data_loc(ff) copied_path = os.path.join(temp_dir, os.path.basename(path)) with open(path, "r") as f: for line in f.readlines(): line = line.strip() if line.startswith("INPUT_FILE_INCLUDE"): include_file = line.split(":")[-1].strip() include_file_path = get_data_loc(include_file) self.logger.debug( f"Copying INPUT_FILE_INCLUDE file {include_file_path} to {temp_dir}" ) include_file_basename = os.path.basename( include_file_path) include_file_output = os.path.join( temp_dir, include_file_basename) if include_file_output not in input_copied: # Copy include file into the temp dir shutil.copy(include_file_path, temp_dir) # Then SED the file to replace the full path with just the basename if include_file != include_file_basename: sed_command = f"sed -i -e 's|{include_file}|{include_file_basename}|g' {copied_path}" self.logger.debug( f"Running sed command: {sed_command}") subprocess.run(sed_command, stderr=subprocess.STDOUT, cwd=temp_dir, shell=True) # And make sure we dont do this file again fs.append(include_file_output) # Write the primary input file main_input_file = f"{temp_dir}/{self.genversion}.input" with open(main_input_file, "w") as f: f.writelines(map(lambda s: s + "\n", self.base)) self.logger.info(f"Input file written to {main_input_file}") # Remove any duplicates and order the output files output_files = [ f"{temp_dir}/{a}" for a in sorted(os.listdir(temp_dir)) ] self.logger.debug( f"{len(output_files)} files used to create simulation. Hashing them." ) # Get current hash new_hash = self.get_hash_from_files(output_files) old_hash = self.get_old_hash() regenerate = force_refresh or (old_hash is None or old_hash != new_hash) if regenerate: self.logger.info(f"Running simulation") # Clean output dir. God I feel dangerous doing this, so hopefully unnecessary check if "//" not in self.output_dir and len(self.output_dir) > 30: self.logger.debug( f"Cleaning output directory {self.output_dir}") shutil.rmtree(self.output_dir, ignore_errors=True) mkdirs(self.output_dir) self.logger.debug( f"Copying from {temp_dir} to {self.output_dir}") copytree(temp_dir, self.output_dir) self.save_new_hash(new_hash) else: self.logger.error( f"Seems to be an issue with the output dir path: {self.output_dir}" ) chown_dir(self.output_dir) else: self.logger.info("Hash check passed, not rerunning") temp_dir_obj.cleanup() return regenerate, new_hash
def _check_completion(self, squeue): if os.path.exists(self.done_file) or os.path.exists(self.done_file2): self.logger.info("Job complete") if os.path.exists(self.done_file): with open(self.done_file) as f: if "FAILURE" in f.read(): return Task.FINISHED_FAILURE if os.path.exists(self.done_file2): with open(self.done_file2) as f: if "FAILURE" in f.read(): return Task.FINISHED_FAILURE new_pred_file = self.output_dir + "/predictions.csv" new_model_file = os.path.join(self.output_dir, f"model.pt") if not os.path.exists(new_pred_file) or not os.path.exists( new_model_file): self.logger.info( "Updating model location or generating predictions file") model, predictions = self.get_model_and_pred() if not os.path.exists(new_model_file): if model is not None: shutil.move(model, new_model_file) args_old, args_new = os.path.abspath( os.path.join(os.path.dirname(model), "cli_args.json") ), self.output_dir + "/cli_args.json" norm_old, norm_new = os.path.abspath( os.path.join(os.path.dirname(model), "data_norm.json") ), self.output_dir + "/data_norm.json" shutil.move(args_old, args_new) shutil.move(norm_old, norm_new) self.logger.info( f"Model file can be found at {new_model_file}") if not os.path.exists(new_pred_file): with open(predictions, "rb") as f: dataframe = pickle.load(f) self.logger.debug(dataframe) self.logger.debug(self.variant) if self.variant in ["variational", "bayesian"]: final_dataframe = dataframe[[ "SNID", "all_class0_median", "all_class0_std" ]] final_dataframe = final_dataframe.rename( columns={ "all_class0_median": self.get_prob_column_name(), "all_class0_std": self.get_prob_column_name() + "_ERR" }) else: final_dataframe = dataframe[["SNID", "all_class0"]] final_dataframe = final_dataframe.rename( columns={ "all_class0": self.get_prob_column_name() }) final_dataframe.to_csv(new_pred_file, index=False, float_format="%0.4f") self.logger.info( f"Predictions file can be found at {new_pred_file}" ) chown_dir(self.output_dir) self.output.update({ "model_filename": new_model_file, "predictions_filename": new_pred_file }) return Task.FINISHED_SUCCESS else: return self.check_for_job(squeue, self.job_base_name)
def write_input(self, force_refresh): self.set_property("GENVERSION", self.genversion, assignment=": ", section_end="ENDLIST_GENVERSION") for k in self.config.keys(): if k.upper() != "GLOBAL": run_config = self.config[k] run_config_keys = list(run_config.keys()) assert "BASE" in run_config_keys, "You must specify a base file for each option" for key in run_config_keys: if key.upper() in self.reserved_keywords: continue base_file = run_config["BASE"] match = base_file.split(".")[0] self.set_property(f"GENOPT({match})", f"{key} {run_config[key]}", section_end="ENDLIST_GENVERSION") for key in self.config.get("GLOBAL", []): if key.upper() == "BASE": continue self.set_property(key, self.config['GLOBAL'][key]) if key == "RANSEED_CHANGE": self.delete_property("RANSEED_REPEAT") elif key == "RANSEED_REPEAT": self.delete_property("RANSEED_CHANGE") self.set_property("SIMGEN_INFILE_Ia", " ".join(self.base_ia) if self.base_ia else None) self.set_property("SIMGEN_INFILE_NONIa", " ".join(self.base_cc) if self.base_cc else None) self.set_property("GENPREFIX", self.genversion) # Put config in a temp directory temp_dir_obj = tempfile.TemporaryDirectory() temp_dir = temp_dir_obj.name # Copy the base files across for f in self.base_ia: shutil.copy(self.data_dir + f, temp_dir) for f in self.base_cc: shutil.copy(self.data_dir + f, temp_dir) # Copy the include input file if there is one input_copied = [] fs = self.base_ia + self.base_cc for ff in fs: if ff not in input_copied: input_copied.append(ff) with open(self.data_dir + ff, "r") as f: for line in f.readlines(): line = line.strip() if line.startswith("INPUT_FILE_INCLUDE"): include_file = line.split(":")[-1].strip() self.logger.debug( f"Copying included file {include_file}") shutil.copy(self.data_dir + include_file, temp_dir) # Write the primary input file main_input_file = f"{temp_dir}/{self.genversion}.input" with open(main_input_file, "w") as f: f.writelines(map(lambda s: s + '\n', self.base)) self.logger.info(f"Input file written to {main_input_file}") # Remove any duplicates and order the output files output_files = [ f"{temp_dir}/{a}" for a in sorted(os.listdir(temp_dir)) ] self.logger.debug( f"{len(output_files)} files used to create simulation. Hashing them." ) # Get current hash new_hash = self.get_hash_from_files(output_files) old_hash = self.get_old_hash() regenerate = force_refresh or (old_hash is None or old_hash != new_hash) if regenerate: self.logger.info(f"Running simulation") # Clean output dir. God I feel dangerous doing this, so hopefully unnecessary check if "//" not in self.output_dir and len(self.output_dir) > 30: self.logger.debug( f"Cleaning output directory {self.output_dir}") shutil.rmtree(self.output_dir, ignore_errors=True) mkdirs(self.output_dir) self.logger.debug( f"Copying from {temp_dir} to {self.output_dir}") copytree(temp_dir, self.output_dir) self.save_new_hash(new_hash) else: self.logger.error( f"Seems to be an issue with the output dir path: {self.output_dir}" ) chown_dir(self.output_dir) else: self.logger.info("Hash check passed, not rerunning") temp_dir_obj.cleanup() return regenerate, new_hash
def _check_completion(self, squeue): # Check log for errors and if found, print the rest of the log so you dont have to look up the file output_error = False if self.logging_file is not None and os.path.exists(self.logging_file): with open(self.logging_file, "r") as f: for line in f.read().splitlines(): if "ERROR" in line or "***** ABORT *****" in line: self.logger.error( f"Fatal error in simulation. See {self.logging_file} for details." ) output_error = True if output_error: self.logger.info(f"Excerpt: {line}") if output_error: self.logger.debug("Removing hash on failure") os.remove(self.hash_file) chown_dir(self.output_dir) return Task.FINISHED_FAILURE else: self.logger.warn( f"Simulation {self.name} logging file does not exist: {self.logging_file}" ) for file in os.listdir(self.sim_log_dir): if not file.startswith("TMP") or not file.endswith(".LOG"): continue with open(self.sim_log_dir + "/" + file, "r") as f: for line in f.read().splitlines(): if (" ABORT " in line or "FATAL[" in line) and not output_error: output_error = True self.logger.error( f"Fatal error in simulation. See {self.sim_log_dir}/{file} for details." ) if output_error: self.logger.info(f"Excerpt: {line}") if output_error: self.logger.debug("Removing hash on failure") os.remove(self.hash_file) chown_dir(self.output_dir) return Task.FINISHED_FAILURE # Check to see if the done file exists sim_folder_endpoint = f"{self.output_dir}/{self.genversion}" if os.path.exists(self.done_file): self.logger.info(f"Simulation {self.name} found done file!") if os.path.exists(self.total_summary): with open(self.total_summary) as f: key, count = None, None for line in f.readlines(): if line.strip().startswith("SUM-"): key = line.strip().split()[0] if line.strip().startswith(self.genversion): count = line.split()[2] self.logger.debug( f"Simulation reports {key} wrote {count} to file" ) else: self.logger.debug(f"Cannot find {self.total_summary}") if not os.path.exists(sim_folder_endpoint): sim_folder = os.path.expandvars( f"{self.global_config['SNANA']['sim_dir']}/{self.genversion}" ) self.logger.info("Done file found, creating symlinks") self.logger.debug( f"Linking {sim_folder} -> {sim_folder_endpoint}") os.symlink(sim_folder, sim_folder_endpoint, target_is_directory=True) chown_dir(self.output_dir) self.output = { "photometry_dir": sim_folder_endpoint, "types": self.get_types(), } return Task.FINISHED_SUCCESS return 0 # TODO: Update to num jobs
def _check_completion(self, squeue): if os.path.exists(self.done_file): self.logger.info(f"Simulation {self.name} found done file!") with open(self.done_file) as f: if "FAIL" in f.read(): self.logger.error( f"Done file {self.done_file} reporting failure") log_files = [self.logging_file] if os.path.exists(self.sim_log_dir): log_files += [ os.path.join(self.sim_log_dir, f) for f in os.listdir(self.sim_log_dir) if f.upper().endswith(".LOG") ] else: self.logger.warning( f"Warning, sim log dir {self.sim_log_dir} does not exist. Something might have gone terribly wrong" ) self.scan_files_for_error(log_files, "FATAL ERROR ABORT", "QOSMaxSubmitJobPerUserLimit", "DUE TO TIME LIMIT") return Task.FINISHED_FAILURE if os.path.exists(self.total_summary): with open(self.total_summary) as f: key, count = None, None allzero = True for line in f.readlines(): if line.strip().startswith("SUM-"): key = line.strip().split()[0] if line.strip().startswith(self.genversion): count = line.split()[2] self.logger.debug( f"Simulation reports {key} wrote {count} to file" ) if int(count.strip()) > 0: allzero = False if allzero: self.logger.error( f"Simulation didn't write anything out according to {self.total_summary}" ) return Task.FINISHED_FAILURE else: self.logger.warning(f"Cannot find {self.total_summary}") self.logger.info("Done file found, creating symlinks") s_ends = [ os.path.join(self.output_dir, os.path.basename(s)) for s in self.sim_folders ] for s, s_end in zip(self.sim_folders, s_ends): if not os.path.exists(s_end): self.logger.debug(f"Linking {s} -> {s_end}") os.symlink(s, s_end, target_is_directory=True) chown_dir(self.output_dir) self.output.update({"photometry_dirs": s_ends}) return Task.FINISHED_SUCCESS return self.check_for_job(squeue, f"{self.genprefix}_0")
def run(self): regenerate, new_hash = self.write_nml() if not regenerate: return new_hash logging_file = self.config_path.replace(".nml", ".nml_log") with open(logging_file, "w") as f: # TODO: Add queue to config and run subprocess.run(["split_and_fit.pl", self.config_path, "NOPROMPT"], stdout=f, stderr=subprocess.STDOUT, cwd=self.output_dir) self.logger.info(f"Light curve fitting outputting to {logging_file}") done_file = f"{self.lc_output_dir}/SPLIT_JOBS_LCFIT.tar.gz" secondary_log = f"{self.lc_output_dir}/SPLIT_JOBS_LCFIT/MERGELOGS/MERGE2.LOG" log_files = [logging_file, secondary_log] while True: time.sleep(self.global_config["OUTPUT"].getint("ping_frequency")) # Check for errors for file in log_files: if os.path.exists(file): with open(file, "r") as f: output_error = False for line in f.read().splitlines(): if ("ERROR" in line or ("ABORT" in line and " 0 " not in line)) and not output_error: self.logger.critical( f"Fatal error in light curve fitting. See {file} for details." ) output_error = True if output_error: self.logger.error(f"Excerpt: {line}") if output_error: return False # Check for existence of SPLIT_JOBS_LCFIT.tar.gz to see if job is done if os.path.exists(done_file): self.logger.info( "Tarball found, fitting complete, cleaning up the directory" ) try: logging_file2 = logging_file.replace("_log", "_log2") with open(logging_file2, "w") as f: subprocess.run( ["split_and_fit.pl", "CLEANMASK", "4", "NOPROMPT"], stdout=f, stderr=subprocess.STDOUT, cwd=self.output_dir, check=True) time.sleep(10) except subprocess.CalledProcessError as e: self.logger.warning( f"split_and_fit.pl has a return code of {e.returncode}. This may or may not be an issue." ) chown_dir(self.output_dir) self.print_stats() return new_hash
def _check_completion(self, squeue): if os.path.exists(self.done_file): self.logger.info("Job complete") new_pred_file = self.output_dir + "/predictions.csv" new_model_file = self.output_dir + "/model.pt" if not os.path.exists(new_pred_file) or not os.path.exists( new_model_file): self.logger.info( "Updating model location or generating predictions file") model, predictions = self.get_model_and_pred() if not os.path.exists(new_model_file): if model is not None: shutil.move(model, new_model_file) args_old, args_new = os.path.abspath( os.path.join(os.path.dirname(model), "cli_args.json") ), self.output_dir + "/cli_args.json" norm_old, norm_new = os.path.abspath( os.path.join(os.path.dirname(model), "data_norm.json") ), self.output_dir + "/data_norm.json" shutil.move(args_old, args_new) shutil.move(norm_old, norm_new) self.logger.info( f"Model file can be found at {new_model_file}") if not os.path.exists(new_pred_file): with open(predictions, "rb") as f: dataframe = pickle.load(f) if self.variant in ["variational", "bayesian"]: final_dataframe = dataframe[[ "SNID", "all_class0_median", "all_class0_std" ]] final_dataframe = final_dataframe.rename( columns={ "all_class0_median": self.get_prob_column_name(), "all_class0_std": self.get_prob_column_name() + "_ERR", }) else: final_dataframe = dataframe[["SNID", "all_class0"]] final_dataframe = final_dataframe.rename( columns={ "all_class0": self.get_prob_column_name() }) final_dataframe.to_csv(new_pred_file, index=False, float_format="%0.4f") self.logger.info( f"Predictions file can be found at {new_pred_file}" ) chown_dir(self.output_dir) self.output.update({ "model_filename": new_model_file, "predictions_filename": new_pred_file }) return Task.FINISHED_SUCCESS else: num_jobs = self.num_jobs if squeue is None else len( [i for i in squeue if self.job_base_name in i]) if squeue is not None and num_jobs == 0: self.logger.warning( "SuperNNova has no done file and has no active jobs. This is not good." ) if os.path.exists(self.hash_file): self.logger.info("Removing hash on failure") os.remove(self.hash_file) return Task.FINISHED_FAILURE return num_jobs
def classify(self): new_hash = self.get_hash_from_string(self.name) if self._check_regenerate(new_hash): shutil.rmtree(self.output_dir, ignore_errors=True) mkdirs(self.output_dir) try: name = self.get_prob_column_name() cid = "CID" s = self.get_simulation_dependency() df = None phot_dir = s.output["photometry_dirs"][self.index] headers = [ os.path.join(phot_dir, a) for a in os.listdir(phot_dir) if "HEAD" in a ] if len(headers) == 0: self.logger.warning( f"No HEAD fits files found in {phot_dir}! Going to do it manually, this may not work." ) cmd = "grep --exclude-dir=* SNID: * | awk -F ':' '{print $3}'" self.logger.debug(f"Running command {cmd}") process = subprocess.run(cmd, capture_output=True, cwd=phot_dir, shell=True) output = process.stdout.decode("ascii").split("\n") output = [x for x in output if x] snid = [x.strip() for x in output] df = pd.DataFrame({cid: snid, name: np.ones(len(snid))}) df.drop_duplicates(subset=cid, inplace=True) else: for h in headers: with fits.open(h) as hdul: data = hdul[1].data snid = np.array(data.field("SNID")) dataframe = pd.DataFrame({ cid: snid, name: np.ones(snid.shape) }) dataframe[cid] = dataframe[cid].apply(str) dataframe[cid] = dataframe[cid].str.strip() if df is None: df = dataframe else: df = pd.concat([df, dataframe]) df.drop_duplicates(subset=cid, inplace=True) self.logger.info(f"Saving probabilities to {self.output_file}") df.to_csv(self.output_file, index=False, float_format="%0.4f") chown_dir(self.output_dir) with open(self.done_file, "w") as f: f.write("SUCCESS") self.save_new_hash(new_hash) except Exception as e: self.logger.exception(e, exc_info=True) self.passed = False with open(self.done_file, "w") as f: f.write("FAILED") return False else: self.should_be_done() self.passed = True return True
def run(args): if args is None: return None init() # Load YAML config file yaml_path = os.path.abspath(os.path.expandvars(args.yaml)) assert os.path.exists(yaml_path), f"File {yaml_path} cannot be found." config_raw, config = load_yaml(yaml_path) #with open(yaml_path, "r") as f: # config = yaml.safe_load(f) overwrites = config.get("GLOBAL") if config.get("GLOBALS") is not None: logging.warning( "Your config file has a GLOBALS section in it. If you're trying to overwrite cfg.yml, rename this to GLOBAL" ) cfg = None if config.get("GLOBAL"): cfg = config.get("GLOBAL").get("CFG_PATH") if cfg is None: cfg = args.config global_config = get_config(initial_path=cfg, overwrites=overwrites) config_filename = os.path.basename(args.yaml).split(".")[0].upper() output_dir = get_output_dir() logging_folder = os.path.abspath(os.path.join(output_dir, config_filename)) if not args.check: mkdirs(logging_folder) if os.path.exists(logging_folder): chown_dir(logging_folder, walk=args.permission) if args.permission: return message_store, logging_filename = setup_logging(config_filename, logging_folder, args) for i, d in enumerate(global_config["DATA_DIRS"]): logging.debug(f"Data directory {i + 1} set as {d}") assert d is not None, "Data directory is none, which means it failed to resolve. Check the error message above for why." logging.info( f"Running on: {os.environ.get('HOSTNAME', '$HOSTNAME not set')} login node." ) manager = Manager(config_filename, yaml_path, config_raw, config, message_store) # Gracefully hand Ctrl-c def handler(signum, frame): logging.error("Ctrl-c was pressed.") logging.warning( "All remaining tasks will be killed and their hash reset") manager.kill_remaining_tasks() exit(1) signal.signal(signal.SIGINT, handler) if args.start is not None: args.refresh = True manager.set_start(args.start) manager.set_finish(args.finish) manager.set_force_refresh(args.refresh) manager.set_force_ignore_stage(args.ignore) manager.execute(args.check, args.compress, args.uncompress) chown_file(logging_filename) return manager