예제 #1
0
    def classify(self):
        new_hash = self.get_hash_from_string(self.name +
                                             f"{self.prob_ia}_{self.prob_cc}")

        if self._check_regenerate(new_hash):
            shutil.rmtree(self.output_dir, ignore_errors=True)
            mkdirs(self.output_dir)
            try:
                name = self.get_prob_column_name()
                cid = "CID"
                s = self.get_simulation_dependency()
                df = None
                phot_dir = s.output["photometry_dirs"][self.index]
                headers = [
                    os.path.join(phot_dir, a) for a in os.listdir(phot_dir)
                    if "HEAD" in a
                ]
                if not headers:
                    Task.fail_config(
                        f"No HEAD fits files found in {phot_dir}!")
                else:
                    types = self.get_simulation_dependency(
                    ).output["types_dict"]
                    self.logger.debug(f"Input types are {types}")

                    for h in headers:
                        with fits.open(h) as hdul:
                            data = hdul[1].data
                            snid = np.array(data.field("SNID"))
                            sntype = np.array(data.field("SNTYPE")).astype(
                                np.int64)
                            is_ia = np.isin(sntype, types["IA"])
                            prob = (is_ia * self.prob_ia) + (~is_ia *
                                                             self.prob_cc)

                            dataframe = pd.DataFrame({cid: snid, name: prob})
                            dataframe[cid] = dataframe[cid].apply(str)
                            dataframe[cid] = dataframe[cid].str.strip()
                            if df is None:
                                df = dataframe
                            else:
                                df = pd.concat([df, dataframe])
                    df.drop_duplicates(subset=cid, inplace=True)

                self.logger.info(f"Saving probabilities to {self.output_file}")
                df.to_csv(self.output_file, index=False, float_format="%0.4f")
                chown_dir(self.output_dir)
                with open(self.done_file, "w") as f:
                    f.write("SUCCESS")
                self.save_new_hash(new_hash)
            except Exception as e:
                self.logger.exception(e, exc_info=True)
                self.passed = False
                with open(self.done_file, "w") as f:
                    f.write("FAILED")
                return False
        else:
            self.should_be_done()
        self.passed = True
        return True
예제 #2
0
 def _check_completion(self, squeue):
     if os.path.exists(self.done_file):
         self.logger.debug(
             f"Merger finished, see combined fitres at {self.suboutput_dir}"
         )
         return Task.FINISHED_SUCCESS
     else:
         output_error = False
         if os.path.exists(self.logfile):
             with open(self.logfile, "r") as f:
                 for line in f.read().splitlines():
                     if "ERROR" in line or "ABORT" in line:
                         self.logger.error(
                             f"Fatal error in combine_fitres. See {self.logfile} for details."
                         )
                         output_error = True
                     if output_error:
                         self.logger.info(f"Excerpt: {line}")
             if output_error:
                 self.logger.debug("Removing hash on failure")
                 os.remove(self.hash_file)
                 chown_dir(self.output_dir)
         else:
             self.logger.error(
                 "Combine task failed with no output log. Please debug")
         return Task.FINISHED_FAILURE
예제 #3
0
    def classify(self):
        new_hash = self.get_hash_from_string(self.name)
        if self._check_regenerate(new_hash):
            mkdirs(self.output_dir)
            input = self.get_fit_dependency()
            fitres_file = os.path.join(input["fitres_dirs"][self.index],
                                       input["fitopt_map"][self.fitopt])
            self.logger.debug(f"Looking for {fitres_file}")
            if not os.path.exists(fitres_file):
                self.logger.error(
                    f"FITRES file could not be found at {fitres_file}, classifer has nothing to work with"
                )
                self.passed = False
                return False

            df = pd.read_csv(fitres_file, delim_whitespace=True, comment="#")
            df = df[[
                "CID", "FITPROB"
            ]].rename(columns={"FITPROB": self.get_prob_column_name()})

            self.logger.info(f"Saving probabilities to {self.output_file}")
            df.to_csv(self.output_file, index=False, float_format="%0.4f")
            chown_dir(self.output_dir)
            with open(self.done_file, "w") as f:
                f.write("SUCCESS")
            self.save_new_hash(new_hash)
        self.passed = True

        return True
예제 #4
0
파일: fitprob.py 프로젝트: skuhl99/Pippin
    def classify(self, force_refresh):
        new_hash = self.check_regenerate(force_refresh)
        if new_hash:
            mkdirs(self.output_dir)
            input = self.get_fit_dependency()
            fitres_file = input["fitres_file"]
            self.logger.debug(f"Looking for {fitres_file}")
            if not os.path.exists(fitres_file):
                self.logger.error(
                    f"FITRES file could not be found at {fitres_file}, classifer has nothing to work with"
                )
                self.passed = False
                return False

            df = pd.read_csv(fitres_file,
                             sep='\s+',
                             comment="#",
                             compression="infer")
            df = df[[
                "CID", "FITPROB"
            ]].rename(columns={"FITPROB": self.get_prob_column_name()})

            self.logger.info(f"Saving probabilities to {self.output_file}")
            df.to_csv(self.output_file, index=False, float_format="%0.4f")
            chown_dir(self.output_dir)
            with open(self.done_file, "w") as f:
                f.write("SUCCESS")
            self.save_new_hash(new_hash)
        self.passed = True

        return True
예제 #5
0
    def write_nml(self, force_refresh):

        # Parse config, first SNLCINP and then FITINP
        for key, value in self.config.get("SNLCINP", {}).items():
            self.set_snlcinp(key, value)
        for key, value in self.config.get("FITINP", {}).items():
            self.set_fitinp(key, value)
        for key, value in self.options.items():
            self.set_property(key,
                              value,
                              assignment=": ",
                              section_end="&SNLCINP")

        if self.sim_task.output["ranseed_change"]:
            self.set_property("VERSION",
                              self.sim_version + "-0*",
                              assignment=": ",
                              section_end="&SNLCINP")
        else:
            self.set_property("VERSION",
                              self.sim_version,
                              assignment=": ",
                              section_end="&SNLCINP")

        self.set_property("OUTDIR",
                          self.lc_output_dir,
                          assignment=": ",
                          section_end="&SNLCINP")
        self.set_property("DONE_STAMP",
                          "FINISHED.DONE",
                          assignment=": ",
                          section_end="&SNLCINP")

        if isinstance(self.sim_task, DataPrep):
            self.set_snlcinp("PRIVATE_DATA_PATH",
                             f"'{self.sim_task.output['data_path']}'")
            self.set_snlcinp("VERSION_PHOTOMETRY",
                             f"'{self.sim_task.output['genversion']}'")

        # We want to do our hashing check here
        string_to_hash = self.fitopts + self.base
        new_hash = self.get_hash_from_string("".join(string_to_hash))
        old_hash = self.get_old_hash()
        regenerate = force_refresh or (old_hash is None
                                       or old_hash != new_hash)

        if regenerate:
            self.logger.info(f"Running Light curve fit. Removing output_dir")
            shutil.rmtree(self.output_dir, ignore_errors=True)
            mkdirs(self.output_dir)
            # Write main file
            with open(self.config_path, "w") as f:
                f.writelines(map(lambda s: s + "\n", string_to_hash))
            self.logger.info(f"NML file written to {self.config_path}")
            self.save_new_hash(new_hash)
            chown_dir(self.output_dir)
        else:
            self.logger.info("Hash check passed, not rerunning")

        return regenerate, new_hash
예제 #6
0
 def _run(self, force_refresh):
     regenerating = self.write_input(force_refresh)
     return False  # TODO: Remove this so it runs when I figure out how the output is location
     if regenerating:
         command = ["SALT2mu_fit.pl", self.config_path]
         for d in self.data:
             command += ["INPDIR+", d]
         command += ["NOPROMPT"]
         with open(self.logging_file, "w") as f:
             subprocess.run(command, stdout=f, stderr=subprocess.STDOUT, cwd=self.output_dir)
         chown_dir(self.output_dir)
     return True
예제 #7
0
파일: merge.py 프로젝트: skuhl99/Pippin
    def _check_completion(self, squeue):
        if os.path.exists(self.done_file):
            self.logger.debug(
                f"Merger finished, see combined fitres at {self.done_file}")

            # Copy MERGE.LOG and FITOPT.README if they aren't there
            filenames = ["MERGE.LOG", "FITOPT.README"]
            for f in filenames:
                original = os.path.join(self.lc_fit["lc_output_dir"], f)
                moved = os.path.join(self.output_dir, f)
                if not os.path.exists(moved):
                    self.logger.debug("Copying file {f} into output directory")
                    shutil.move(original, moved)

            # Dick around with folders and names to make it resemble split_and_fit output for salt2mu
            outdir = os.path.join(self.output_dir, self.lc_fit["genversion"])
            new_output = os.path.join(outdir, "FITOPT000.FITRES")
            if not os.path.exists(outdir):
                os.makedirs(outdir, exist_ok=True)

                original_output = self.done_file
                shutil.move(original_output, new_output)

                # Recreate done file -_-
                with open(self.done_file, "w") as f:
                    f.write("SUCCESS")

            self.output["fitres_file"] = new_output
            self.output["fitres_dir"] = outdir
            return Task.FINISHED_SUCCESS
        else:
            output_error = False
            if os.path.exists(self.logfile):
                with open(self.logfile, "r") as f:
                    for line in f.read().splitlines():
                        if "ERROR" in line or "ABORT" in line:
                            self.logger.error(
                                f"Fatal error in combine_fitres. See {self.logfile} for details."
                            )
                            output_error = True
                        if output_error:
                            self.logger.info(f"Excerpt: {line}")
                if output_error:
                    self.logger.debug("Removing hash on failure")
                    os.remove(self.hash_file)
                    chown_dir(self.output_dir)
                    return Task.FINISHED_FAILURE
            else:
                self.logger.error(
                    "Combine task failed with no output log. Please debug")
                return Task.FINISHED_FAILURE
예제 #8
0
파일: snana_fit.py 프로젝트: skuhl99/Pippin
    def write_nml(self, force_refresh):
        self.logger.debug(f"Loading fitopts file from {self.fitopts_file}")
        with open(self.fitopts_file, "r") as f:
            self.fitopts = list(f.read().splitlines())
            self.logger.info(
                f"Loaded {len(self.fitopts)} fitopts file from {self.fitopts_file}"
            )

        # Parse config, first SNLCINP and then FITINP
        for key, value in self.config.get("SNLCINP", {}).items():
            self.set_snlcinp(key, value)
        for key, value in self.config.get("FITINP", {}).items():
            self.set_fitinp(key, value)
        self.set_property(
            "VERSION",
            self.sim_version + "*",
            assignment=": ",
            section_end="&SNLCINP")  # TODO FIX THIS, DOUBLE VERSION KEY
        self.set_property("OUTDIR",
                          self.lc_output_dir,
                          assignment=": ",
                          section_end="&SNLCINP")
        if isinstance(self.sim_task, DataPrep):
            self.set_snlcinp("PRIVATE_DATA_PATH",
                             f"'{self.sim_task.output['data_path']}'")
            self.set_snlcinp("VERSION_PHOTOMETRY",
                             f"'{self.sim_task.output['genversion']}'")

        # We want to do our hashing check here
        string_to_hash = self.fitopts + self.base
        # with open(os.path.abspath(inspect.stack()[0][1]), "r") as f:
        #     string_to_hash += f.read()
        new_hash = self.get_hash_from_string("".join(string_to_hash))
        old_hash = self.get_old_hash()
        regenerate = force_refresh or (old_hash is None
                                       or old_hash != new_hash)

        if regenerate:
            self.logger.info(f"Running Light curve fit. Removing output_dir")
            shutil.rmtree(self.output_dir, ignore_errors=True)
            mkdirs(self.output_dir)
            # Write main file
            with open(self.config_path, "w") as f:
                f.writelines(map(lambda s: s + '\n', string_to_hash))
            self.logger.info(f"NML file written to {self.config_path}")
            self.save_new_hash(new_hash)
            chown_dir(self.output_dir)
        else:
            self.logger.info("Hash check passed, not rerunning")

        return regenerate, new_hash
예제 #9
0
파일: biascor.py 프로젝트: skuhl99/Pippin
 def _run(self, force_refresh):
     regenerating = self.write_input(force_refresh)
     if regenerating:
         command = ["SALT2mu_fit.pl", self.config_filename]
         for d in self.data:
             command += ["INPDIR+", d]
         command += ["NOPROMPT"]
         self.logger.debug(f"Will check for done file at {self.done_file}")
         self.logger.debug(f"Will output log at {self.logging_file}")
         self.logger.debug(f"Running command: {' '.join(command)}")
         with open(self.logging_file, "w") as f:
             subprocess.run(command, stdout=f, stderr=subprocess.STDOUT, cwd=self.output_dir)
         chown_dir(self.output_dir)
     return True
예제 #10
0
파일: snana_fit.py 프로젝트: skuhl99/Pippin
    def _check_completion(self, squeue):
        # Check for errors
        for file in self.log_files:
            if os.path.exists(file):
                with open(file, "r") as f:
                    output_error = False
                    for line in f.read().splitlines():
                        if ("ERROR" in line or
                            ("ABORT" in line
                             and " 0 " not in line)) and not output_error:
                            self.logger.error(
                                f"Fatal error in light curve fitting. See {file} for details."
                            )
                            output_error = True
                        if output_error:
                            self.logger.info(f"Excerpt: {line}")

                if output_error:
                    return Task.FINISHED_FAILURE

        # Check for existence of SPLIT_JOBS_LCFIT.tar.gz to see if job is done
        if os.path.exists(self.done_file):
            self.logger.info("Light curve done file found")
            logging_file2 = self.logging_file.replace("_log", "_log2")
            if not os.path.exists(logging_file2):
                self.logger.info(
                    "Tarball found, fitting complete, cleaning up the directory"
                )
                try:
                    with open(logging_file2, "w") as f:
                        subprocess.run(
                            ["split_and_fit.pl", "CLEANMASK", "4", "NOPROMPT"],
                            stdout=f,
                            stderr=subprocess.STDOUT,
                            cwd=self.output_dir,
                            check=True)
                        time.sleep(2)
                except subprocess.CalledProcessError as e:
                    self.logger.warning(
                        f"split_and_fit.pl has a return code of {e.returncode}. This may or may not be an issue."
                    )
                chown_dir(self.output_dir)
                self.print_stats()

            self.output["fitres_file"] = os.path.abspath(
                os.path.join(self.fitres_dir,
                             "FITOPT000.FITRES.gz"))  # TODO: Ask rick if there
            return Task.FINISHED_SUCCESS
        return 0
예제 #11
0
    def write_nml(self):
        # Parse config, first SNLCINP and then FITINP
        for key, value in self.config.get("SNLCINP", {}).items():
            self.set_snlcinp(key, value)
        for key, value in self.config.get("FITINP", {}).items():
            self.set_fitinp(key, value)
        self.set_property(
            "VERSION",
            self.sim_version + "*",
            assignment=":",
            section_end="&SNLCINP")  # TODO FIX THIS, DOUBLE VERSION KEY
        self.set_property("OUTDIR",
                          self.lc_output_dir,
                          assignment=":",
                          section_end="&SNLCINP")

        # Load old hash
        old_hash = None
        hash_file = f"{self.output_dir}/hash.txt"
        if os.path.exists(hash_file):
            with open(hash_file, "r") as f:
                old_hash = f.read().strip()
                self.logger.debug(f"Previous result found, hash is {old_hash}")

        # We want to do our hashing check here
        total_string = self.fitopts + self.base
        string_to_hash = self.sim_hash + "".join(total_string)
        with open(os.path.abspath(inspect.stack()[0][1]), "r") as f:
            string_to_hash += f.read()
        new_hash = get_hash(string_to_hash)

        self.logger.debug(f"Current hash set to {new_hash}")
        regenerate = old_hash is None or old_hash != new_hash

        if regenerate:
            self.logger.info(f"Running Light curve fit, hash check failed")

            # Write main file
            with open(self.config_path, "w") as f:
                f.writelines(map(lambda s: s + '\n', total_string))
            self.logger.info(f"NML file written to {self.config_path}")

            with open(hash_file, "w") as f:
                f.write(str(new_hash))
                self.logger.debug(f"New hash saved to {hash_file}")
            chown_dir(self.output_dir)

        return regenerate, new_hash
예제 #12
0
파일: snana_fit.py 프로젝트: Samreay/Pippin
    def write_nml(self):

        # Parse config, first SNLCINP and then FITINP
        for key, value in self.config.get("SNLCINP", {}).items():
            self.set_snlcinp(key, value)
        for key, value in self.config.get("FITINP", {}).items():
            self.set_fitinp(key, value)
        for key, value in self.options.items():
            #print(key,value)
            self.yaml["CONFIG"][key] = value

        self.compute_fitopts()

        if self.sim_task.output["ranseed_change"]:
            self.yaml["CONFIG"]["VERSION"] = [self.sim_version + "-0*"]
        else:
            self.yaml["CONFIG"]["VERSION"] = [self.sim_version]

        self.yaml["CONFIG"]["OUTDIR"] = self.lc_output_dir
        # self.yaml["CONFIG"]["DONE_STAMP"] = "ALL.DONE"

        if isinstance(self.sim_task, DataPrep):
            data_path = self.sim_task.output["data_path"]
            if "SNDATA_ROOT/lcmerge" not in data_path:
                self.set_snlcinp("PRIVATE_DATA_PATH", f"'{self.sim_task.output['data_path']}'")
            self.set_snlcinp("VERSION_PHOTOMETRY", f"'{self.sim_task.output['genversion']}'")

        # We want to do our hashing check here
        string_to_hash = self.get_output_string()
        new_hash = self.get_hash_from_string(string_to_hash)
        regenerate = self._check_regenerate(new_hash)
        if regenerate:
            self.logger.info(f"Running Light curve fit. Removing output_dir")
            shutil.rmtree(self.output_dir, ignore_errors=True)
            mkdirs(self.output_dir)
            # Write main file

            # Write the primary input file
            self.write_output_file(self.config_path)
            self.logger.info(f"NML file written to {self.config_path}")
            self.save_new_hash(new_hash)
            chown_dir(self.output_dir)
        else:
            self.logger.info("Hash check passed, not rerunning")

        return regenerate, new_hash
예제 #13
0
 def check_task_completion(self, t, blocked_tasks, done_tasks, failed_tasks,
                           running_tasks, squeue):
     result = t.check_completion(squeue)
     # If its finished, good or bad, juggle tasks
     if result in [Task.FINISHED_SUCCESS, Task.FINISHED_FAILURE]:
         if t.gpu:
             self.num_jobs_queue_gpu -= t.num_jobs
         else:
             self.num_jobs_queue -= t.num_jobs
         if result == Task.FINISHED_SUCCESS:
             running_tasks.remove(t)
             self.logger.notice(
                 f"FINISHED: {t}, total jobs now {self.num_jobs_queue}")
             done_tasks.append(t)
         else:
             self.fail_task(t, running_tasks, failed_tasks, blocked_tasks)
         chown_dir(t.output_dir)
         return True
     return False
예제 #14
0
파일: wfit.py 프로젝트: Samreay/Pippin
    def _run(self):
        self.yaml["CONFIG"]["WFITOPT"] = self.wfitopts
        self.yaml["CONFIG"]["INPDIR"] = self.create_cov_dirs
        self.yaml["CONFIG"]["OUTDIR"] = os.path.join(self.output_dir, "output")
        # Pass all OPTS keys through to the yaml dictionary
        for k, v in self.options.items():
            # Clobber WFITOPTS to WFITOPT
            if k == "WFITOPTS":
                k = "WFITOPT"
            self.yaml["CONFIG"][k] = v

        final_output_for_hash = self.get_output_string()

        new_hash = self.get_hash_from_string(final_output_for_hash)

        if self._check_regenerate(new_hash):
            self.logger.debug("Regenerating and launching task")
            shutil.rmtree(self.output_dir, ignore_errors=True)
            mkdirs(self.output_dir)
            self.save_new_hash(new_hash)

            with open(self.input_file, "w") as f:
                f.write(self.get_output_string())

            cmd = ["submit_batch_jobs.sh", os.path.basename(self.input_file)]
            self.logger.debug(
                f"Submitting wfit job: {' '.join(cmd)} in cwd: {self.output_dir}"
            )
            self.logger.debug(f"Logging to {self.logfile}")
            with open(self.logfile, 'w') as f:
                subprocess.run(' '.join(cmd),
                               stdout=f,
                               stderr=subprocess.STDOUT,
                               cwd=self.output_dir,
                               shell=True)
            chown_dir(self.output_dir)

        else:
            self.should_be_done()
            self.logger.info("Has check passed, not rerunning")
        return True
예제 #15
0
    def classify(self):
        mkdirs(self.output_dir)

        fitres = f"{self.fit_dir}/FITOPT000.FITRES.gz"
        self.logger.debug(f"Looking for {fitres}")
        if not os.path.exists(fitres):
            self.logger.error(
                f"FITRES file could not be found at {fitres}, classifer has nothing to work with"
            )
            return False

        data = pd.read_csv(fitres, sep='\s+', comment="#", compression="infer")
        ids = data["CID"].values
        probability = np.random.uniform(size=ids.size)
        combined = np.vstack((ids, probability)).T

        output_file = self.output_dir + "/prob.txt"
        self.logger.info(f"Saving probabilities to {output_file}")
        np.savetxt(output_file, combined)
        chown_dir(self.output_dir)
        return True  # change to hash
예제 #16
0
    def run(self):

        regenerate, new_hash = self.write_input()
        if not regenerate:
            return new_hash

        logging_file = self.config_path.replace(".input", ".input_log")
        with open(logging_file, "w") as f:
            subprocess.run(["sim_SNmix.pl", self.config_path], stdout=f, stderr=subprocess.STDOUT, cwd=self.output_dir)
        shutil.chown(logging_file, group=self.global_config["SNANA"]["group"])

        self.logger.info(f"Sim running and logging outputting to {logging_file}")
        sim_log_dir = f"{self.output_dir}/SIMLOGS_{self.genversion}"
        done_file = f"{sim_log_dir}/SIMJOB_ALL.DONE"

        # Monitor for success or failure
        time.sleep(10)
        while True:
            # Check log for errors and if found, print the rest of the log so you dont have to look up the file
            output_error = False
            if os.path.exists(logging_file):
                with open(logging_file, "r") as f:
                    for line in f.read().splitlines():
                        if "ERROR" in line:
                            self.logger.critical(f"Fatal error in simulation. See {logging_file} for details.")
                            output_error = True
                        if output_error:
                            self.logger.error(f"Excerpt: {line}")
                if output_error:
                    self.logger.debug("Removing hash on failure")
                    os.remove(self.hash_file)
                    chown_dir(self.output_dir)
                    return False
            for file in os.listdir(sim_log_dir):
                if not file.startswith("TMP") or not file.endswith(".LOG"):
                    continue
                with open(sim_log_dir + "/" + file, "r") as f:
                    for line in f.read().splitlines():
                        if (" ABORT " in line or "FATAL[" in line) and not output_error:
                            output_error = True
                            self.logger.critical(f"Fatal error in simulation. See {sim_log_dir}/{file} for details.")
                        if output_error:
                            self.logger.error(f"Excerpt: {line}")
                if output_error:
                    self.logger.debug("Removing hash on failure")
                    os.remove(self.hash_file)
                    chown_dir(self.output_dir)
                    return False

            # Check to see if the done file exists
            if os.path.exists(done_file):
                sim_folder = os.path.expandvars(f"{self.global_config['SNANA']['sim_dir']}/{self.genversion}")
                sim_folder_endpoint = f"{self.output_dir}/{self.genversion}"
                self.logger.info("Done file found, creating symlinks")
                self.logger.debug(f"Linking {sim_folder} -> {sim_folder_endpoint}")
                os.symlink(sim_folder, sim_folder_endpoint, target_is_directory=True)
                chown_dir(self.output_dir)
                return new_hash

            time.sleep(self.global_config["OUTPUT"].getint("ping_frequency"))
예제 #17
0
파일: biascor.py 프로젝트: Samreay/Pippin
 def _run(self):
     if self.blind:
         self.logger.info("NOTE: This run is being BLINDED")
     regenerating = self.write_input()
     if regenerating:
         command = [
             "submit_batch_jobs.sh",
             os.path.basename(self.config_filename)
         ]
         self.logger.debug(f"Will check for done file at {self.done_file}")
         self.logger.debug(f"Will output log at {self.logging_file}")
         self.logger.debug(f"Running command: {' '.join(command)}")
         with open(self.logging_file, "w") as f:
             subprocess.run([' '.join(command)],
                            stdout=f,
                            stderr=subprocess.STDOUT,
                            cwd=self.output_dir,
                            shell=True)
         chown_dir(self.output_dir)
         self.set_m0dif_dirs()
     else:
         self.should_be_done()
         self.logger.info("Hash check passed, not rerunning")
     return True
예제 #18
0
 def check_task_completion(self, t, squeue):
     result = t.check_completion(squeue)
     # If its finished, good or bad, juggle tasks
     if result in [Task.FINISHED_SUCCESS, Task.FINISHED_FAILURE]:
         if t.gpu:
             self.num_jobs_queue_gpu -= t.num_jobs
         else:
             self.num_jobs_queue -= t.num_jobs
         if result == Task.FINISHED_SUCCESS:
             self.running.remove(t)
             self.logger.notice(
                 f"FINISHED: {t} with {t.num_jobs} NUM_JOBS. NUM_JOBS now {self.num_jobs_queue}"
             )
             self.done.append(t)
             if self.compress:
                 t.compress()
         else:
             self.fail_task(t)
         if os.path.exists(t.output_dir):
             chown_dir(t.output_dir)
         else:
             chown_file(t.output_dir + ".tar.gz")
         return True
     return False
예제 #19
0
    def write_input(self):
        # Load previous hash here if it exists

        old_hash = None
        hash_file = f"{self.output_dir}/hash.txt"
        if os.path.exists(hash_file):
            with open(hash_file, "r") as f:
                old_hash = f.read().strip()
                self.logger.debug(f"Previous result found, hash is {old_hash}")

        # Put config in a temp directory
        temp_dir_obj = tempfile.TemporaryDirectory()
        temp_dir = temp_dir_obj.name

        # Copy the base files across
        for f in self.base_ia:
            shutil.copy(self.data_dir + f, temp_dir)
        for f in self.base_cc:
            shutil.copy(self.data_dir + f, temp_dir)

        # Copy the include input file if there is one
        input_copied = []
        fs = self.base_ia + self.base_cc
        for ff in fs:
            if ff not in input_copied:
                input_copied.append(ff)
                with open(self.data_dir + ff, "r") as f:
                    for line in f.readlines():
                        line = line.strip()
                        if line.startswith("INPUT_FILE_INCLUDE"):
                            include_file = line.split(":")[-1].strip()
                            self.logger.debug(f"Copying included file {include_file}")
                            shutil.copy(self.data_dir + include_file, temp_dir)

        # Write the primary input file
        main_input_file = f"{temp_dir}/{self.genversion}.input"
        with open(main_input_file, "w") as f:
            f.writelines(map(lambda s: s + '\n', self.base))
        self.logger.info(f"Input file written to {main_input_file}")

        # Remove any duplicates and order the output files
        output_files = [f"{temp_dir}/{a}" for a in sorted(os.listdir(temp_dir))]
        self.logger.debug(f"{len(output_files)} files used to create simulation. Hashing them.")

        # Also add this file to the hash, so if the code changes we also regenerate. Smart.
        output_files.append(os.path.abspath(inspect.stack()[0][1]))

        # Get current hash
        string_to_hash = ""
        for file in output_files:
            with open(file, "r") as f:
                string_to_hash += f.read()
        new_hash = get_hash(string_to_hash)
        self.logger.debug(f"Current hash set to {new_hash}")
        regenerate = old_hash is None or old_hash != new_hash

        if regenerate:
            self.logger.info(f"Running simulation, hash check failed")
            # Clean output dir. God I feel dangerous doing this, so hopefully unnecessary check
            if "//" not in self.output_dir and "Pippin" in self.output_dir:
                self.logger.debug(f"Cleaning output directory {self.output_dir}")
                shutil.rmtree(self.output_dir, ignore_errors=True)
                mkdirs(self.output_dir)
                self.logger.debug(f"Copying from {temp_dir} to {self.output_dir}")
                copytree(temp_dir, self.output_dir)
            with open(hash_file, "w") as f:
                f.write(str(new_hash))
                self.logger.debug(f"New hash saved to {hash_file}")
                self.hash_file = hash_file
            chown_dir(self.output_dir)
        else:
            self.logger.info("Hash check passed, not rerunning")
        temp_dir_obj.cleanup()
        return regenerate, new_hash
예제 #20
0
    def write_input(self):
        # As Pippin only does one GENVERSION at a time, lets extract it first, and also the config
        c = self.yaml["CONFIG"]
        d = self.yaml["GENVERSION_LIST"][0]
        g = self.yaml["GENOPT_GLOBAL"]

        # Ensure g is a dict with a ref we can update
        if g is None:
            g = {}
            self.yaml["GENOPT_GLOBAL"] = g

        # Start setting properties in the right area
        d["GENVERSION"] = self.genversion

        # Logging now goes in the "CONFIG"
        c["LOGDIR"] = os.path.basename(self.sim_log_dir)

        for k in self.config.keys():
            if k.upper() not in self.reserved_top:
                run_config = self.config[k]
                run_config_keys = list(run_config.keys())
                assert "BASE" in run_config_keys, "You must specify a base file for each option"
                for key in run_config_keys:
                    if key.upper() in self.reserved_keywords:
                        continue
                    base_file = run_config["BASE"]
                    match = os.path.basename(base_file).split(".")[0]
                    val = run_config[key]
                    if not isinstance(val, list):
                        val = [val]

                    lookup = f"GENOPT({match})"
                    if lookup not in d:
                        d[lookup] = {}
                    for v in val:
                        d[lookup][key] = v

        if len(self.data_dirs) > 1:
            data_dir = self.data_dirs[0]
            c["PATH_USER_INPUT"] = data_dir

        for key in self.config.get("GLOBAL", []):
            if key.upper() == "BASE":
                continue
            direct_set = [
                "FORMAT_MASK", "RANSEED_REPEAT", "RANSEED_CHANGE",
                "BATCH_INFO", "BATCH_MEM", "NGEN_UNIT", "RESET_CIDOFF"
            ]
            if key in direct_set:
                c[key] = self.config["GLOBAL"][key]
            else:
                g[key] = self.config["GLOBAL"][key]

            if self.derived_batch_info:
                c["BATCH_INFO"] = self.derived_batch_info

            if key == "RANSEED_CHANGE" and c.get("RANSEED_REPEAT") is not None:
                del c["RANSEED_REPEAT"]
            elif key == "RANSEED_REPEAT" and c.get(
                    "RANSEED_CHANGE") is not None:
                del c["RANSEED_CHANGE"]

        if self.base_ia:
            c["SIMGEN_INFILE_Ia"] = [os.path.basename(f) for f in self.base_ia]
        else:
            del c["SIMGEN_INFILE_Ia"]

        if self.base_cc:
            c["SIMGEN_INFILE_NONIa"] = [
                os.path.basename(f) for f in self.base_cc
            ]
        else:
            del c["SIMGEN_INFILE_NONIa"]

        c["GENPREFIX"] = self.genprefix

        # Put config in a temp directory
        temp_dir_obj = tempfile.TemporaryDirectory()
        temp_dir = temp_dir_obj.name

        # Copy the base files across
        input_paths = []
        for f in self.base_ia + self.base_cc:
            resolved = get_data_loc(f)
            shutil.copy(resolved, temp_dir)
            input_paths.append(os.path.join(temp_dir, os.path.basename(f)))
            self.logger.debug(f"Copying input file {resolved} to {temp_dir}")

        # Copy the include input file if there is one
        input_copied = []
        fs = self.base_ia + self.base_cc
        for ff in fs:
            if ff not in input_copied:
                input_copied.append(ff)
                path = get_data_loc(ff)
                copied_path = os.path.join(temp_dir, os.path.basename(path))
                with open(path, "r") as f:
                    for line in f.readlines():
                        line = line.strip()
                        if line.startswith("INPUT_FILE_INCLUDE"):
                            include_file = line.split(":")[-1].strip()
                            include_file_path = get_data_loc(include_file)
                            self.logger.debug(
                                f"Copying INPUT_FILE_INCLUDE file {include_file_path} to {temp_dir}"
                            )

                            include_file_basename = os.path.basename(
                                include_file_path)
                            include_file_output = os.path.join(
                                temp_dir, include_file_basename)

                            if include_file_output not in input_copied:

                                # Copy include file into the temp dir
                                shutil.copy(include_file_path, temp_dir)

                                # Then SED the file to replace the full path with just the basename
                                if include_file != include_file_basename:
                                    sed_command = f"sed -i -e 's|{include_file}|{include_file_basename}|g' {copied_path}"
                                    self.logger.debug(
                                        f"Running sed command: {sed_command}")
                                    subprocess.run(sed_command,
                                                   stderr=subprocess.STDOUT,
                                                   cwd=temp_dir,
                                                   shell=True)

                                # And make sure we dont do this file again
                                fs.append(include_file_output)

        # Write the primary input file
        main_input_file = f"{temp_dir}/{self.genversion}.input"
        self.write_output_file(main_input_file)

        # Remove any duplicates and order the output files
        output_files = [
            f"{temp_dir}/{a}" for a in sorted(os.listdir(temp_dir))
        ]
        self.logger.debug(
            f"{len(output_files)} files used to create simulation. Hashing them."
        )

        # Get current hash
        new_hash = self.get_hash_from_files(output_files)
        regenerate = self._check_regenerate(new_hash)

        if regenerate:
            self.logger.info(f"Running simulation")
            # Clean output dir. God I feel dangerous doing this, so hopefully unnecessary check
            if "//" not in self.output_dir and len(self.output_dir) > 30:
                self.logger.debug(
                    f"Cleaning output directory {self.output_dir}")
                shutil.rmtree(self.output_dir, ignore_errors=True)
                mkdirs(self.output_dir)
                self.logger.debug(
                    f"Copying from {temp_dir} to {self.output_dir}")
                copytree(temp_dir, self.output_dir)
                self.save_new_hash(new_hash)
            else:
                self.logger.error(
                    f"Seems to be an issue with the output dir path: {self.output_dir}"
                )

            chown_dir(self.output_dir)
        else:
            self.logger.info("Hash check passed, not rerunning")
        temp_dir_obj.cleanup()
        return regenerate, new_hash
예제 #21
0
    def _check_completion(self, squeue):

        if os.path.exists(
                self.done_file) or not os.path.exists(self.total_summary):

            if os.path.exists(self.done_file):
                self.logger.info(f"Simulation {self.name} found done file!")
                with open(self.done_file) as f:
                    if "FAIL" in f.read():
                        self.logger.error(
                            f"Done file {self.done_file} reporting failure")
                        return self.check_issues()
            else:
                self.logger.error(
                    "MERGE.LOG was not created, job died on submission")
                return self.check_issues()

            if os.path.exists(self.total_summary):
                y = read_yaml(self.total_summary)
                if "MERGE" in y.keys():
                    for i, row in enumerate(y["MERGE"]):
                        if len(
                                row
                        ) == 6:  # Old version for backward compatibility (before 15/01/2021)
                            state, iver, version, ngen, nwrite, cpu = row
                        else:  # New MERGE.LOG syntax (after 15/01/2021)
                            state, iver, version, ngen, nwrite, nspec, cpu = row
                        if cpu < 60:
                            units = "minutes"
                        else:
                            cpu = cpu / 60
                            units = "hours"
                        self.logger.info(
                            f"Simulation {i + 1} generated {ngen} events and wrote {nwrite} to file, taking {cpu:0.1f} CPU {units}"
                        )
                else:
                    self.logger.error(
                        f"File {self.total_summary} does not have a MERGE section - did it die?"
                    )
                    return self.kill_and_fail()
                if "SURVEY" in y.keys():
                    self.output["SURVEY"] = y["SURVEY"]
                    self.output["SURVEY_ID"] = y["IDSURVEY"]
                else:
                    self.output["SURVEY"] = "UNKNOWN"
                    self.output["SURVEY_ID"] = 0

            else:
                self.logger.warning(f"Cannot find {self.total_summary}")

            self.logger.info("Done file found, creating symlinks")
            s_ends = [
                os.path.join(self.output_dir, os.path.basename(s))
                for s in self.sim_folders
            ]
            for s, s_end in zip(self.sim_folders, s_ends):
                if not os.path.exists(s_end):
                    # Check to make sure there isn't a broken symlink at s_end
                    # os.path.exists will return false for broken symlinks, even if one exists
                    if os.path.islink(s_end):
                        self.logger.error(
                            f"Symlink {s_end} exists and is pointing to a broken or missing directory"
                        )
                        return Task.FINISHED_FAILURE
                    else:
                        self.logger.debug(f"Linking {s} -> {s_end}")
                        os.symlink(s, s_end, target_is_directory=True)
                chown_dir(self.output_dir)
            self.output.update({"photometry_dirs": s_ends})
            return Task.FINISHED_SUCCESS

        return self.check_for_job(squeue, f"{self.genversion}.input-CPU")
예제 #22
0
    def write_input(self, force_refresh):
        self.set_property("GENVERSION",
                          self.genversion,
                          assignment=": ",
                          section_end="ENDLIST_GENVERSION")
        self.set_property("LOGDIR",
                          os.path.basename(self.sim_log_dir),
                          assignment=": ",
                          section_end="ENDLIST_GENVERSION")
        for k in self.config.keys():
            if k.upper() != "GLOBAL":
                run_config = self.config[k]
                run_config_keys = list(run_config.keys())
                assert "BASE" in run_config_keys, "You must specify a base file for each option"
                for key in run_config_keys:
                    if key.upper() in self.reserved_keywords:
                        continue
                    base_file = run_config["BASE"]
                    match = os.path.basename(base_file).split(".")[0]
                    val = run_config[key]
                    if not isinstance(val, list):
                        val = [val]
                    for v in val:
                        self.set_property(f"GENOPT({match})",
                                          f"{key} {v}",
                                          section_end="ENDLIST_GENVERSION",
                                          only_add=True)

        if len(self.data_dirs) > 1:
            data_dir = self.data_dirs[0]
            self.set_property("PATH_USER_INPUT", data_dir, assignment=": ")

        for key in self.config.get("GLOBAL", []):
            if key.upper() == "BASE":
                continue
            direct_set = [
                "FORMAT_MASK", "RANSEED_REPEAT", "RANSEED_CHANGE",
                "BATCH_INFO", "BATCH_MEM", "NGEN_UNIT", "RESET_CIDOFF"
            ]
            if key in direct_set:
                self.set_property(key,
                                  self.config["GLOBAL"][key],
                                  assignment=": ")
            else:
                self.set_property(f"GENOPT_GLOBAL: {key}",
                                  self.config["GLOBAL"][key],
                                  assignment=" ")

            if self.derived_batch_info:
                self.set_property("BATCH_INFO",
                                  self.derived_batch_info,
                                  assignment=": ")

            if key == "RANSEED_CHANGE":
                self.delete_property("RANSEED_REPEAT")
            elif key == "RANSEED_REPEAT":
                self.delete_property("RANSEED_CHANGE")

        self.set_property(
            "SIMGEN_INFILE_Ia",
            " ".join([os.path.basename(f)
                      for f in self.base_ia]) if self.base_ia else None)
        self.set_property(
            "SIMGEN_INFILE_NONIa",
            " ".join([os.path.basename(f)
                      for f in self.base_cc]) if self.base_cc else None)
        self.set_property("GENPREFIX", self.genprefix)

        # Put config in a temp directory
        temp_dir_obj = tempfile.TemporaryDirectory()
        temp_dir = temp_dir_obj.name

        # Copy the base files across
        input_paths = []
        for f in self.base_ia + self.base_cc:
            resolved = get_data_loc(f)
            shutil.copy(resolved, temp_dir)
            input_paths.append(os.path.join(temp_dir, os.path.basename(f)))
            self.logger.debug(f"Copying input file {resolved} to {temp_dir}")

        # Copy the include input file if there is one
        input_copied = []
        fs = self.base_ia + self.base_cc
        for ff in fs:
            if ff not in input_copied:
                input_copied.append(ff)
                path = get_data_loc(ff)
                copied_path = os.path.join(temp_dir, os.path.basename(path))
                with open(path, "r") as f:
                    for line in f.readlines():
                        line = line.strip()
                        if line.startswith("INPUT_FILE_INCLUDE"):
                            include_file = line.split(":")[-1].strip()
                            include_file_path = get_data_loc(include_file)
                            self.logger.debug(
                                f"Copying INPUT_FILE_INCLUDE file {include_file_path} to {temp_dir}"
                            )

                            include_file_basename = os.path.basename(
                                include_file_path)
                            include_file_output = os.path.join(
                                temp_dir, include_file_basename)

                            if include_file_output not in input_copied:

                                # Copy include file into the temp dir
                                shutil.copy(include_file_path, temp_dir)

                                # Then SED the file to replace the full path with just the basename
                                if include_file != include_file_basename:
                                    sed_command = f"sed -i -e 's|{include_file}|{include_file_basename}|g' {copied_path}"
                                    self.logger.debug(
                                        f"Running sed command: {sed_command}")
                                    subprocess.run(sed_command,
                                                   stderr=subprocess.STDOUT,
                                                   cwd=temp_dir,
                                                   shell=True)

                                # And make sure we dont do this file again
                                fs.append(include_file_output)

        # Write the primary input file
        main_input_file = f"{temp_dir}/{self.genversion}.input"
        with open(main_input_file, "w") as f:
            f.writelines(map(lambda s: s + "\n", self.base))
        self.logger.info(f"Input file written to {main_input_file}")

        # Remove any duplicates and order the output files
        output_files = [
            f"{temp_dir}/{a}" for a in sorted(os.listdir(temp_dir))
        ]
        self.logger.debug(
            f"{len(output_files)} files used to create simulation. Hashing them."
        )

        # Get current hash
        new_hash = self.get_hash_from_files(output_files)
        old_hash = self.get_old_hash()
        regenerate = force_refresh or (old_hash is None
                                       or old_hash != new_hash)

        if regenerate:
            self.logger.info(f"Running simulation")
            # Clean output dir. God I feel dangerous doing this, so hopefully unnecessary check
            if "//" not in self.output_dir and len(self.output_dir) > 30:
                self.logger.debug(
                    f"Cleaning output directory {self.output_dir}")
                shutil.rmtree(self.output_dir, ignore_errors=True)
                mkdirs(self.output_dir)
                self.logger.debug(
                    f"Copying from {temp_dir} to {self.output_dir}")
                copytree(temp_dir, self.output_dir)
                self.save_new_hash(new_hash)
            else:
                self.logger.error(
                    f"Seems to be an issue with the output dir path: {self.output_dir}"
                )

            chown_dir(self.output_dir)
        else:
            self.logger.info("Hash check passed, not rerunning")
        temp_dir_obj.cleanup()
        return regenerate, new_hash
예제 #23
0
    def _check_completion(self, squeue):
        if os.path.exists(self.done_file) or os.path.exists(self.done_file2):
            self.logger.info("Job complete")
            if os.path.exists(self.done_file):
                with open(self.done_file) as f:
                    if "FAILURE" in f.read():
                        return Task.FINISHED_FAILURE
            if os.path.exists(self.done_file2):
                with open(self.done_file2) as f:
                    if "FAILURE" in f.read():
                        return Task.FINISHED_FAILURE

            new_pred_file = self.output_dir + "/predictions.csv"
            new_model_file = os.path.join(self.output_dir, f"model.pt")

            if not os.path.exists(new_pred_file) or not os.path.exists(
                    new_model_file):
                self.logger.info(
                    "Updating model location or generating predictions file")
                model, predictions = self.get_model_and_pred()

                if not os.path.exists(new_model_file):
                    if model is not None:
                        shutil.move(model, new_model_file)
                        args_old, args_new = os.path.abspath(
                            os.path.join(os.path.dirname(model),
                                         "cli_args.json")
                        ), self.output_dir + "/cli_args.json"
                        norm_old, norm_new = os.path.abspath(
                            os.path.join(os.path.dirname(model),
                                         "data_norm.json")
                        ), self.output_dir + "/data_norm.json"
                        shutil.move(args_old, args_new)
                        shutil.move(norm_old, norm_new)
                        self.logger.info(
                            f"Model file can be found at {new_model_file}")
                if not os.path.exists(new_pred_file):
                    with open(predictions, "rb") as f:
                        dataframe = pickle.load(f)
                        self.logger.debug(dataframe)
                        self.logger.debug(self.variant)
                        if self.variant in ["variational", "bayesian"]:
                            final_dataframe = dataframe[[
                                "SNID", "all_class0_median", "all_class0_std"
                            ]]
                            final_dataframe = final_dataframe.rename(
                                columns={
                                    "all_class0_median":
                                    self.get_prob_column_name(),
                                    "all_class0_std":
                                    self.get_prob_column_name() + "_ERR"
                                })
                        else:
                            final_dataframe = dataframe[["SNID", "all_class0"]]
                            final_dataframe = final_dataframe.rename(
                                columns={
                                    "all_class0": self.get_prob_column_name()
                                })
                        final_dataframe.to_csv(new_pred_file,
                                               index=False,
                                               float_format="%0.4f")
                        self.logger.info(
                            f"Predictions file can be found at {new_pred_file}"
                        )
                chown_dir(self.output_dir)

            self.output.update({
                "model_filename": new_model_file,
                "predictions_filename": new_pred_file
            })
            return Task.FINISHED_SUCCESS
        else:
            return self.check_for_job(squeue, self.job_base_name)
예제 #24
0
파일: snana_sim.py 프로젝트: skuhl99/Pippin
    def write_input(self, force_refresh):
        self.set_property("GENVERSION",
                          self.genversion,
                          assignment=": ",
                          section_end="ENDLIST_GENVERSION")
        for k in self.config.keys():
            if k.upper() != "GLOBAL":
                run_config = self.config[k]
                run_config_keys = list(run_config.keys())
                assert "BASE" in run_config_keys, "You must specify a base file for each option"
                for key in run_config_keys:
                    if key.upper() in self.reserved_keywords:
                        continue
                    base_file = run_config["BASE"]
                    match = base_file.split(".")[0]
                    self.set_property(f"GENOPT({match})",
                                      f"{key} {run_config[key]}",
                                      section_end="ENDLIST_GENVERSION")

        for key in self.config.get("GLOBAL", []):
            if key.upper() == "BASE":
                continue
            self.set_property(key, self.config['GLOBAL'][key])
            if key == "RANSEED_CHANGE":
                self.delete_property("RANSEED_REPEAT")
            elif key == "RANSEED_REPEAT":
                self.delete_property("RANSEED_CHANGE")

        self.set_property("SIMGEN_INFILE_Ia",
                          " ".join(self.base_ia) if self.base_ia else None)
        self.set_property("SIMGEN_INFILE_NONIa",
                          " ".join(self.base_cc) if self.base_cc else None)
        self.set_property("GENPREFIX", self.genversion)

        # Put config in a temp directory
        temp_dir_obj = tempfile.TemporaryDirectory()
        temp_dir = temp_dir_obj.name

        # Copy the base files across
        for f in self.base_ia:
            shutil.copy(self.data_dir + f, temp_dir)
        for f in self.base_cc:
            shutil.copy(self.data_dir + f, temp_dir)

        # Copy the include input file if there is one
        input_copied = []
        fs = self.base_ia + self.base_cc
        for ff in fs:
            if ff not in input_copied:
                input_copied.append(ff)
                with open(self.data_dir + ff, "r") as f:
                    for line in f.readlines():
                        line = line.strip()
                        if line.startswith("INPUT_FILE_INCLUDE"):
                            include_file = line.split(":")[-1].strip()
                            self.logger.debug(
                                f"Copying included file {include_file}")
                            shutil.copy(self.data_dir + include_file, temp_dir)

        # Write the primary input file
        main_input_file = f"{temp_dir}/{self.genversion}.input"
        with open(main_input_file, "w") as f:
            f.writelines(map(lambda s: s + '\n', self.base))
        self.logger.info(f"Input file written to {main_input_file}")

        # Remove any duplicates and order the output files
        output_files = [
            f"{temp_dir}/{a}" for a in sorted(os.listdir(temp_dir))
        ]
        self.logger.debug(
            f"{len(output_files)} files used to create simulation. Hashing them."
        )

        # Get current hash
        new_hash = self.get_hash_from_files(output_files)
        old_hash = self.get_old_hash()
        regenerate = force_refresh or (old_hash is None
                                       or old_hash != new_hash)

        if regenerate:
            self.logger.info(f"Running simulation")
            # Clean output dir. God I feel dangerous doing this, so hopefully unnecessary check
            if "//" not in self.output_dir and len(self.output_dir) > 30:
                self.logger.debug(
                    f"Cleaning output directory {self.output_dir}")
                shutil.rmtree(self.output_dir, ignore_errors=True)
                mkdirs(self.output_dir)
                self.logger.debug(
                    f"Copying from {temp_dir} to {self.output_dir}")
                copytree(temp_dir, self.output_dir)
                self.save_new_hash(new_hash)
            else:
                self.logger.error(
                    f"Seems to be an issue with the output dir path: {self.output_dir}"
                )

            chown_dir(self.output_dir)
        else:
            self.logger.info("Hash check passed, not rerunning")
        temp_dir_obj.cleanup()
        return regenerate, new_hash
예제 #25
0
파일: snana_sim.py 프로젝트: skuhl99/Pippin
    def _check_completion(self, squeue):
        # Check log for errors and if found, print the rest of the log so you dont have to look up the file
        output_error = False
        if self.logging_file is not None and os.path.exists(self.logging_file):
            with open(self.logging_file, "r") as f:
                for line in f.read().splitlines():
                    if "ERROR" in line or "***** ABORT *****" in line:
                        self.logger.error(
                            f"Fatal error in simulation. See {self.logging_file} for details."
                        )
                        output_error = True
                    if output_error:
                        self.logger.info(f"Excerpt: {line}")
            if output_error:
                self.logger.debug("Removing hash on failure")
                os.remove(self.hash_file)
                chown_dir(self.output_dir)
                return Task.FINISHED_FAILURE
        else:
            self.logger.warn(
                f"Simulation {self.name} logging file does not exist: {self.logging_file}"
            )
        for file in os.listdir(self.sim_log_dir):
            if not file.startswith("TMP") or not file.endswith(".LOG"):
                continue
            with open(self.sim_log_dir + "/" + file, "r") as f:
                for line in f.read().splitlines():
                    if (" ABORT " in line
                            or "FATAL[" in line) and not output_error:
                        output_error = True
                        self.logger.error(
                            f"Fatal error in simulation. See {self.sim_log_dir}/{file} for details."
                        )
                    if output_error:
                        self.logger.info(f"Excerpt: {line}")
            if output_error:
                self.logger.debug("Removing hash on failure")
                os.remove(self.hash_file)
                chown_dir(self.output_dir)
                return Task.FINISHED_FAILURE

        # Check to see if the done file exists
        sim_folder_endpoint = f"{self.output_dir}/{self.genversion}"
        if os.path.exists(self.done_file):
            self.logger.info(f"Simulation {self.name} found done file!")
            if os.path.exists(self.total_summary):
                with open(self.total_summary) as f:
                    key, count = None, None
                    for line in f.readlines():
                        if line.strip().startswith("SUM-"):
                            key = line.strip().split()[0]
                        if line.strip().startswith(self.genversion):
                            count = line.split()[2]
                            self.logger.debug(
                                f"Simulation reports {key} wrote {count} to file"
                            )
            else:
                self.logger.debug(f"Cannot find {self.total_summary}")
            if not os.path.exists(sim_folder_endpoint):
                sim_folder = os.path.expandvars(
                    f"{self.global_config['SNANA']['sim_dir']}/{self.genversion}"
                )
                self.logger.info("Done file found, creating symlinks")
                self.logger.debug(
                    f"Linking {sim_folder} -> {sim_folder_endpoint}")
                os.symlink(sim_folder,
                           sim_folder_endpoint,
                           target_is_directory=True)
                chown_dir(self.output_dir)
            self.output = {
                "photometry_dir": sim_folder_endpoint,
                "types": self.get_types(),
            }
            return Task.FINISHED_SUCCESS
        return 0  # TODO: Update to num jobs
예제 #26
0
    def _check_completion(self, squeue):

        if os.path.exists(self.done_file):
            self.logger.info(f"Simulation {self.name} found done file!")

            with open(self.done_file) as f:
                if "FAIL" in f.read():
                    self.logger.error(
                        f"Done file {self.done_file} reporting failure")

                    log_files = [self.logging_file]
                    if os.path.exists(self.sim_log_dir):
                        log_files += [
                            os.path.join(self.sim_log_dir, f)
                            for f in os.listdir(self.sim_log_dir)
                            if f.upper().endswith(".LOG")
                        ]
                    else:
                        self.logger.warning(
                            f"Warning, sim log dir {self.sim_log_dir} does not exist. Something might have gone terribly wrong"
                        )
                    self.scan_files_for_error(log_files, "FATAL ERROR ABORT",
                                              "QOSMaxSubmitJobPerUserLimit",
                                              "DUE TO TIME LIMIT")
                    return Task.FINISHED_FAILURE

            if os.path.exists(self.total_summary):
                with open(self.total_summary) as f:
                    key, count = None, None
                    allzero = True
                    for line in f.readlines():
                        if line.strip().startswith("SUM-"):
                            key = line.strip().split()[0]
                        if line.strip().startswith(self.genversion):
                            count = line.split()[2]
                            self.logger.debug(
                                f"Simulation reports {key} wrote {count} to file"
                            )
                            if int(count.strip()) > 0:
                                allzero = False
                    if allzero:
                        self.logger.error(
                            f"Simulation didn't write anything out according to {self.total_summary}"
                        )
                        return Task.FINISHED_FAILURE
            else:
                self.logger.warning(f"Cannot find {self.total_summary}")

            self.logger.info("Done file found, creating symlinks")
            s_ends = [
                os.path.join(self.output_dir, os.path.basename(s))
                for s in self.sim_folders
            ]
            for s, s_end in zip(self.sim_folders, s_ends):
                if not os.path.exists(s_end):
                    self.logger.debug(f"Linking {s} -> {s_end}")
                    os.symlink(s, s_end, target_is_directory=True)
                chown_dir(self.output_dir)
            self.output.update({"photometry_dirs": s_ends})
            return Task.FINISHED_SUCCESS

        return self.check_for_job(squeue, f"{self.genprefix}_0")
예제 #27
0
    def run(self):
        regenerate, new_hash = self.write_nml()
        if not regenerate:
            return new_hash

        logging_file = self.config_path.replace(".nml", ".nml_log")
        with open(logging_file, "w") as f:
            # TODO: Add queue to config and run
            subprocess.run(["split_and_fit.pl", self.config_path, "NOPROMPT"],
                           stdout=f,
                           stderr=subprocess.STDOUT,
                           cwd=self.output_dir)
        self.logger.info(f"Light curve fitting outputting to {logging_file}")
        done_file = f"{self.lc_output_dir}/SPLIT_JOBS_LCFIT.tar.gz"
        secondary_log = f"{self.lc_output_dir}/SPLIT_JOBS_LCFIT/MERGELOGS/MERGE2.LOG"

        log_files = [logging_file, secondary_log]
        while True:
            time.sleep(self.global_config["OUTPUT"].getint("ping_frequency"))

            # Check for errors
            for file in log_files:
                if os.path.exists(file):
                    with open(file, "r") as f:
                        output_error = False
                        for line in f.read().splitlines():
                            if ("ERROR" in line or
                                ("ABORT" in line
                                 and " 0 " not in line)) and not output_error:
                                self.logger.critical(
                                    f"Fatal error in light curve fitting. See {file} for details."
                                )
                                output_error = True
                            if output_error:
                                self.logger.error(f"Excerpt: {line}")

                    if output_error:
                        return False

            # Check for existence of SPLIT_JOBS_LCFIT.tar.gz to see if job is done
            if os.path.exists(done_file):
                self.logger.info(
                    "Tarball found, fitting complete, cleaning up the directory"
                )
                try:
                    logging_file2 = logging_file.replace("_log", "_log2")
                    with open(logging_file2, "w") as f:
                        subprocess.run(
                            ["split_and_fit.pl", "CLEANMASK", "4", "NOPROMPT"],
                            stdout=f,
                            stderr=subprocess.STDOUT,
                            cwd=self.output_dir,
                            check=True)
                        time.sleep(10)
                except subprocess.CalledProcessError as e:
                    self.logger.warning(
                        f"split_and_fit.pl has a return code of {e.returncode}. This may or may not be an issue."
                    )
                chown_dir(self.output_dir)
                self.print_stats()
                return new_hash
예제 #28
0
    def _check_completion(self, squeue):
        if os.path.exists(self.done_file):
            self.logger.info("Job complete")

            new_pred_file = self.output_dir + "/predictions.csv"
            new_model_file = self.output_dir + "/model.pt"

            if not os.path.exists(new_pred_file) or not os.path.exists(
                    new_model_file):
                self.logger.info(
                    "Updating model location or generating predictions file")
                model, predictions = self.get_model_and_pred()

                if not os.path.exists(new_model_file):
                    if model is not None:
                        shutil.move(model, new_model_file)
                        args_old, args_new = os.path.abspath(
                            os.path.join(os.path.dirname(model),
                                         "cli_args.json")
                        ), self.output_dir + "/cli_args.json"
                        norm_old, norm_new = os.path.abspath(
                            os.path.join(os.path.dirname(model),
                                         "data_norm.json")
                        ), self.output_dir + "/data_norm.json"
                        shutil.move(args_old, args_new)
                        shutil.move(norm_old, norm_new)
                        self.logger.info(
                            f"Model file can be found at {new_model_file}")
                if not os.path.exists(new_pred_file):
                    with open(predictions, "rb") as f:
                        dataframe = pickle.load(f)
                        if self.variant in ["variational", "bayesian"]:
                            final_dataframe = dataframe[[
                                "SNID", "all_class0_median", "all_class0_std"
                            ]]
                            final_dataframe = final_dataframe.rename(
                                columns={
                                    "all_class0_median":
                                    self.get_prob_column_name(),
                                    "all_class0_std":
                                    self.get_prob_column_name() + "_ERR",
                                })
                        else:
                            final_dataframe = dataframe[["SNID", "all_class0"]]
                            final_dataframe = final_dataframe.rename(
                                columns={
                                    "all_class0": self.get_prob_column_name()
                                })
                        final_dataframe.to_csv(new_pred_file,
                                               index=False,
                                               float_format="%0.4f")
                        self.logger.info(
                            f"Predictions file can be found at {new_pred_file}"
                        )
                chown_dir(self.output_dir)

            self.output.update({
                "model_filename": new_model_file,
                "predictions_filename": new_pred_file
            })
            return Task.FINISHED_SUCCESS
        else:
            num_jobs = self.num_jobs if squeue is None else len(
                [i for i in squeue if self.job_base_name in i])
            if squeue is not None and num_jobs == 0:
                self.logger.warning(
                    "SuperNNova has no done file and has no active jobs. This is not good."
                )
                if os.path.exists(self.hash_file):
                    self.logger.info("Removing hash on failure")
                    os.remove(self.hash_file)
                return Task.FINISHED_FAILURE
            return num_jobs
예제 #29
0
    def classify(self):
        new_hash = self.get_hash_from_string(self.name)
        if self._check_regenerate(new_hash):
            shutil.rmtree(self.output_dir, ignore_errors=True)
            mkdirs(self.output_dir)
            try:
                name = self.get_prob_column_name()
                cid = "CID"
                s = self.get_simulation_dependency()
                df = None
                phot_dir = s.output["photometry_dirs"][self.index]
                headers = [
                    os.path.join(phot_dir, a) for a in os.listdir(phot_dir)
                    if "HEAD" in a
                ]
                if len(headers) == 0:
                    self.logger.warning(
                        f"No HEAD fits files found in {phot_dir}! Going to do it manually, this may not work."
                    )

                    cmd = "grep --exclude-dir=* SNID: * | awk -F ':' '{print $3}'"
                    self.logger.debug(f"Running command   {cmd}")
                    process = subprocess.run(cmd,
                                             capture_output=True,
                                             cwd=phot_dir,
                                             shell=True)
                    output = process.stdout.decode("ascii").split("\n")
                    output = [x for x in output if x]

                    snid = [x.strip() for x in output]
                    df = pd.DataFrame({cid: snid, name: np.ones(len(snid))})
                    df.drop_duplicates(subset=cid, inplace=True)

                else:
                    for h in headers:
                        with fits.open(h) as hdul:
                            data = hdul[1].data
                            snid = np.array(data.field("SNID"))
                            dataframe = pd.DataFrame({
                                cid: snid,
                                name: np.ones(snid.shape)
                            })
                            dataframe[cid] = dataframe[cid].apply(str)
                            dataframe[cid] = dataframe[cid].str.strip()
                            if df is None:
                                df = dataframe
                            else:
                                df = pd.concat([df, dataframe])
                    df.drop_duplicates(subset=cid, inplace=True)

                self.logger.info(f"Saving probabilities to {self.output_file}")
                df.to_csv(self.output_file, index=False, float_format="%0.4f")
                chown_dir(self.output_dir)
                with open(self.done_file, "w") as f:
                    f.write("SUCCESS")
                self.save_new_hash(new_hash)
            except Exception as e:
                self.logger.exception(e, exc_info=True)
                self.passed = False
                with open(self.done_file, "w") as f:
                    f.write("FAILED")
                return False
        else:
            self.should_be_done()
        self.passed = True

        return True
예제 #30
0
def run(args):

    if args is None:
        return None

    init()

    # Load YAML config file
    yaml_path = os.path.abspath(os.path.expandvars(args.yaml))
    assert os.path.exists(yaml_path), f"File {yaml_path} cannot be found."
    config_raw, config = load_yaml(yaml_path)
    #with open(yaml_path, "r") as f:
    #    config = yaml.safe_load(f)

    overwrites = config.get("GLOBAL")
    if config.get("GLOBALS") is not None:
        logging.warning(
            "Your config file has a GLOBALS section in it. If you're trying to overwrite cfg.yml, rename this to GLOBAL"
        )

    cfg = None
    if config.get("GLOBAL"):
        cfg = config.get("GLOBAL").get("CFG_PATH")
    if cfg is None:
        cfg = args.config

    global_config = get_config(initial_path=cfg, overwrites=overwrites)

    config_filename = os.path.basename(args.yaml).split(".")[0].upper()
    output_dir = get_output_dir()
    logging_folder = os.path.abspath(os.path.join(output_dir, config_filename))

    if not args.check:
        mkdirs(logging_folder)
    if os.path.exists(logging_folder):
        chown_dir(logging_folder, walk=args.permission)

    if args.permission:
        return

    message_store, logging_filename = setup_logging(config_filename,
                                                    logging_folder, args)

    for i, d in enumerate(global_config["DATA_DIRS"]):
        logging.debug(f"Data directory {i + 1} set as {d}")
        assert d is not None, "Data directory is none, which means it failed to resolve. Check the error message above for why."

    logging.info(
        f"Running on: {os.environ.get('HOSTNAME', '$HOSTNAME not set')} login node."
    )

    manager = Manager(config_filename, yaml_path, config_raw, config,
                      message_store)

    # Gracefully hand Ctrl-c
    def handler(signum, frame):
        logging.error("Ctrl-c was pressed.")
        logging.warning(
            "All remaining tasks will be killed and their hash reset")
        manager.kill_remaining_tasks()
        exit(1)

    signal.signal(signal.SIGINT, handler)

    if args.start is not None:
        args.refresh = True
    manager.set_start(args.start)
    manager.set_finish(args.finish)
    manager.set_force_refresh(args.refresh)
    manager.set_force_ignore_stage(args.ignore)
    manager.execute(args.check, args.compress, args.uncompress)
    chown_file(logging_filename)
    return manager