def predict(self, force_refresh):
     model = self.options.get("MODEL")
     if model is None:
         self.logger.error(
             "If you are in predict model, please specify a MODEL in OPTS. Either a file location or a training task name."
         )
         return False
     if not os.path.exists(get_output_loc(model)):
         # If its not a file, it must be a task
         for t in self.dependencies:
             if model == t.name:
                 self.logger.debug(
                     f"Found task dependency {t.name} with model file {t.output['model_filename']}"
                 )
                 model = t.output["model_filename"]
     else:
         model = get_output_loc(model)
     types = " ".join([
         str(a) for a in
         self.get_simulation_dependency().output["types_dict"]["IA"]
     ])
     if not types:
         types = "1"
     command = (f"-p "
                f"--features {self.features} "
                f"--done_file {self.done_file} "
                f"--model {model} "
                f"--types {types} "
                f"--name {self.get_prob_column_name()} "
                f"--output {self.predictions_filename} "
                f"{self.fitres_file}")
     return self.classify(force_refresh, command)
Exemple #2
0
 def predict(self, force_refresh):
     model = self.options.get("MODEL")
     if model is None:
         self.logger.error(
             "If you are in predict model, please specify a MODEL in OPTS. Either a file location or a training task name."
         )
         return False
     potential_path = get_output_loc(model)
     if os.path.exists(potential_path):
         self.logger.debug(f"Found existing model file at {potential_path}")
         model = potential_path
     else:
         if "/" in model:
             self.logger.warning(
                 f"Your model {model} looks like a path, but I couldn't find a model at {potential_path}"
             )
         # If its not a file, it must be a task
         for t in self.dependencies:
             if model == t.name:
                 self.logger.debug(
                     f"Found task dependency {t.name} with model file {t.output['model_filename']}"
                 )
                 model = t.output["model_filename"]
     command = (f"--nc 4 "
                f"--nclass 2 "
                f"--ft {self.features} "
                f"--restore "
                f"--pklfile {model} "
                f"--pklformat FITRES "
                f"{self.get_rf_conf()}"
                f"--test {self.fitres_file} "
                f"--filedir {self.output_dir} "
                f"--done_file {self.done_file} "
                f"--use_filenames ")
     return self.classify(force_refresh, command)
Exemple #3
0
    def __init__(self, name, output_dir, dependencies, mode, options):
        super().__init__(name, output_dir, dependencies, mode, options)
        self.global_config = get_config()
        self.dump_dir = output_dir + "/dump"
        self.job_base_name = os.path.basename(output_dir)

        self.tmp_output = None
        self.done_file = os.path.join(self.output_dir, "done_task.txt")
        self.variant = options.get("VARIANT", "vanilla").lower()
        assert self.variant in ["vanilla", "variational", "bayesian"], \
            f"Variant {self.variant} is not vanilla, variational or bayesian"
        self.slurm = """#!/bin/bash

#SBATCH --job-name={job_name}
#SBATCH --time=15:00:00
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --partition=gpu2
#SBATCH --gres=gpu:1
#SBATCH --output=output.log
#SBATCH --account=pi-rkessler
#SBATCH --mem=64GB

source activate {conda_env}
module load cuda
echo `which python`
cd {path_to_classifier}
python run.py --data --sntypes '{sntypes}' --dump_dir {dump_dir} --raw_dir {photometry_dir} {fit_dir} {phot} {clump} {test_or_train}
python run.py --use_cuda {cyclic} --sntypes '{sntypes}' --done_file {done_file} --dump_dir {dump_dir} {cyclic} {variant} {model} {phot} {command}
        """
        self.conda_env = self.global_config["SuperNNova"]["conda_env"]
        self.path_to_classifier = get_output_loc(
            self.global_config["SuperNNova"]["location"])
Exemple #4
0
    def __init__(self, name, output_dir, dependencies, options, recal_aggtask):
        super().__init__(name, output_dir, dependencies=dependencies)
        self.passed = False
        self.classifiers = [
            d for d in dependencies if isinstance(d, Classifier)
        ]
        self.lcfit_deps = [
            c.get_fit_dependency(output=False) for c in self.classifiers
        ]
        self.lcfit_names = list(
            set([l.output["name"] for l in self.lcfit_deps if l is not None]))
        self.output["lcfit_names"] = self.lcfit_names
        if not self.lcfit_names:
            self.logger.debug(
                "No jobs depend on the LCFIT, so adding a dummy one")
            self.lcfit_names = [""]

        self.sim_task = self.get_underlying_sim_task()
        self.output["sim_name"] = self.sim_task.name
        self.recal_aggtask = recal_aggtask
        self.num_versions = len(self.sim_task.output["sim_folders"])

        self.output_dfs = [
            os.path.join(self.output_dir, f"merged_{i}.csv")
            for i in range(self.num_versions)
        ]
        self.output_dfs_key = [[
            os.path.join(self.output_dir, f"merged_{l}_{i}.key")
            for l in self.lcfit_names
        ] for i in range(self.num_versions)]
        self.output_cals = [
            os.path.join(self.output_dir, f"calibration_{i}.csv")
            for i in range(self.num_versions)
        ]

        self.id = "CID"
        self.type_name = "SNTYPE"
        self.options = options
        self.include_type = bool(options.get("INCLUDE_TYPE", False))
        self.plot = options.get("PLOT", True)
        self.plot_all = options.get("PLOT_ALL", False)
        self.output["classifiers"] = self.classifiers
        self.output["calibration_files"] = self.output_cals
        if isinstance(self.plot, bool):
            self.python_file = os.path.dirname(
                inspect.stack()[0][1]) + "/external/aggregator_plot.py"
        else:
            self.python_file = self.plot
        self.python_file = get_output_loc(self.python_file)

        if not os.path.exists(self.python_file):
            Task.fail_config(
                f"Attempting to find python file {self.python_file} but it's not there!"
            )
Exemple #5
0
    def __init__(self, name, output_dir, dependencies, mode, options):
        super().__init__(name, output_dir, dependencies, mode, options)
        self.global_config = get_config()
        self.num_jobs = 4

        self.conda_env = self.global_config["ArgonneClassifier"]["conda_env"]
        self.path_to_classifier = get_output_loc(self.global_config["ArgonneClassifier"]["location"])
        self.job_base_name = os.path.basename(output_dir)
        self.features = options.get("FEATURES", "x1 c zHD x1ERR cERR PKMJDERR")
        self.model_pk_file = "modelpkl.pkl"
        self.output_pk_file = os.path.join(self.output_dir,  self.model_pk_file)

        self.slurm = """#!/bin/bash
Exemple #6
0
    def predict(self, force_refresh):
        train_info = self.get_fit_dependency()

        model = self.options.get("MODEL")
        assert model is not None, "If TRAIN is not specified, you have to point to a model to use"
        for t in self.dependencies:
            if model == t.name:
                self.logger.debug(
                    f"Found task dependency {t.name} with model file {t.output['model_filename']}"
                )
                model = t.output["model_filename"]

        model_path = get_output_loc(model)
        self.logger.debug(f"Looking for model in {model_path}")
        if not os.path.exists(model_path):
            self.logger.error(f"Cannot find {model_path}")
            return False

        old_hash = self.get_old_hash()
        new_hash = self.get_hash_from_string(self.name + model_path)

        if force_refresh or new_hash != old_hash:
            self.logger.debug("Regenerating")

            if os.path.exists(self.output_dir):
                shutil.rmtree(self.output_dir, ignore_errors=True)
            mkdirs(self.output_dir)
            self.save_new_hash(new_hash)

            job_name = 'nearnbr_apply.exe'
            inArgs = f'-inFile_data {train_info["fitres_file"]} -inFile_MLpar {model_path}'
            outArgs = f'-outFile {self.outfile_predict} -varName_prob {self.get_prob_column_name()}'
            cmd_job = ('%s %s %s' % (job_name, inArgs, outArgs))
            self.logger.debug(f"Executing command {cmd_job}")
            with open(self.logging_file, "w") as f:
                val = subprocess.run(cmd_job.split(" "),
                                     stdout=f,
                                     stderr=subprocess.STDOUT,
                                     cwd=self.output_dir)
                with open(self.done_file, "w") as f:
                    if val.returncode == 0:
                        f.write("SUCCESS")
                    else:
                        f.write("FAILURE")
        else:
            self.logger.debug("Not regenerating")
        return True
Exemple #7
0
    def __init__(self, name, output_dir, options, dependencies=None):
        super().__init__(name, output_dir, dependencies=dependencies)
        self.options = options
        self.global_config = get_config()

        self.logfile = os.path.join(self.output_dir, "output.log")
        self.conda_env = self.global_config["DataSkimmer"]["conda_env"]
        self.path_to_task = output_dir

        self.raw_dir = self.options.get("RAW_DIR")
        self.genversion = os.path.basename(self.raw_dir)
        self.data_path = os.path.dirname(self.raw_dir)
        self.job_name = f"DATAPREP_{self.name}"

        self.output["genversion"] = self.genversion
        self.output["data_path"] = self.data_path
        self.output["photometry_dir"] = get_output_loc(self.raw_dir)
        self.output["raw_dir"] = self.raw_dir
        self.clump_file = os.path.join(self.output_dir,
                                       self.genversion + ".SNANA.TEXT")
        self.output["clump_file"] = self.clump_file

        self.slurm = """#!/bin/bash
#SBATCH --job-name={job_name}
#SBATCH --time=0:20:00
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --partition=broadwl
#SBATCH --output={log_file}
#SBATCH --account=pi-rkessler
#SBATCH --mem=2GB

cd {path_to_task}
snana.exe clump.nml
if [ $? -eq 0 ]; then
    echo SUCCESS > {done_file}
else
    echo FAILURE > {done_file}
fi
"""
        self.clump_command = """#
Exemple #8
0
    def __init__(self,
                 name,
                 output_dir,
                 config,
                 dependencies,
                 mode,
                 options,
                 index=0,
                 model_name=None):
        super().__init__(name,
                         output_dir,
                         config,
                         dependencies,
                         mode,
                         options,
                         index=index,
                         model_name=model_name)
        self.global_config = get_config()
        self.num_jobs = 4

        self.conda_env = self.global_config["SNIRF"]["conda_env"]
        self.path_to_classifier = get_output_loc(
            self.global_config["SNIRF"]["location"])
        self.job_base_name = os.path.basename(
            Path(output_dir).parents[1]) + "__" + os.path.basename(output_dir)
        self.features = options.get("FEATURES", "x1 c zHD x1ERR cERR PKMJDERR")
        self.validate_model()

        self.model_pk_file = "model.pkl"
        self.output_pk_file = os.path.join(self.output_dir, self.model_pk_file)
        self.fitopt = options.get("FITOPT", "DEFAULT")
        self.fitres_filename = None
        self.fitres_file = None

        self.batch_file = self.options.get("BATCH_FILE")
        if self.batch_file is not None:
            self.batch_file = get_data_loc(self.batch_file)
        self.batch_replace = self.options.get("BATCH_REPLACE", {})

        self.slurm = """{sbatch_header}
Exemple #9
0
    def __init__(self,
                 name,
                 output_dir,
                 dependencies,
                 mode,
                 options,
                 index=0,
                 model_name=None):
        super().__init__(name,
                         output_dir,
                         dependencies,
                         mode,
                         options,
                         index=index,
                         model_name=model_name)
        self.global_config = get_config()
        self.num_jobs = 4

        self.conda_env = self.global_config["SNIRF"]["conda_env"]
        self.path_to_classifier = get_output_loc(
            self.global_config["SNIRF"]["location"])
        self.job_base_name = os.path.basename(
            Path(output_dir).parents[1]) + "__" + os.path.basename(output_dir)
        self.features = options.get("FEATURES", "x1 c zHD x1ERR cERR PKMJDERR")
        self.validate_model()

        self.model_pk_file = "model.pkl"
        self.output_pk_file = os.path.join(self.output_dir, self.model_pk_file)
        self.fitopt = options.get("FITOPT", "DEFAULT")
        lcfit = self.get_fit_dependency()
        self.fitres_filename = lcfit["fitopt_map"][self.fitopt]
        self.fitres_file = os.path.abspath(
            os.path.join(lcfit["fitres_dirs"][self.index],
                         self.fitres_filename))

        self.slurm = """#!/bin/bash
Exemple #10
0
 def predict(self, force_refresh):
     model = self.options.get("MODEL")
     if model is None:
         self.logger.error("If you are in predict model, please specify a MODEL in OPTS. Either a file location or a training task name.")
         return False
     if not os.path.exists(get_output_loc(model)):
         # If its not a file, it must be a task
         for t in self.dependencies:
             if model == t.name:
                 self.logger.debug(f"Found task dependency {t.name} with model file {t.output['model_filename']}")
                 model = t.output["model_filename"]
     command = (
         f"--nc 4 "
         f"--nclass 2 "
         f"--ft {self.features} "
         f"--restore "
         f"--pklfile {model} "
         f"--pklformat FITRES "
         f"--test {self.get_fits_file()} "
         f"--filedir {self.output_dir} "
         f"--done_file {self.done_file} "
         f"--use_filenames "
     )
     return self.classify(force_refresh, command)
Exemple #11
0
    def __init__(self,
                 name,
                 output_dir,
                 config,
                 dependencies,
                 mode,
                 options,
                 index=0,
                 model_name=None):
        super().__init__(name,
                         output_dir,
                         config,
                         dependencies,
                         mode,
                         options,
                         index=index,
                         model_name=model_name)
        self.global_config = get_config()
        self.dump_dir = output_dir + "/dump"
        self.job_base_name = os.path.basename(output_dir)
        self.gpu = config.get("GPU", True)
        self.tmp_output = None
        self.done_file = os.path.join(self.output_dir, "done_task.txt")
        self.done_file2 = os.path.join(self.output_dir, "done_task2.txt")
        self.variant = options.get("VARIANT", "vanilla").lower()
        self.redshift = "zspe" if options.get("REDSHIFT", True) else "none"
        self.norm = options.get("NORM", "cosmo")
        self.cyclic = options.get("CYCLIC", True)
        self.seed = options.get("SEED", 0)
        self.clean = config.get("CLEAN", True)
        self.batch_size = options.get("BATCH_SIZE", 128)
        self.num_layers = options.get("NUM_LAYERS", 2)
        self.hidden_dim = options.get("HIDDEN_DIM", 32)

        # Setup yml files
        self.data_yml_file = options.get("DATA_YML", None)
        self.output_data_yml = os.path.join(self.output_dir, "data.yml")
        self.classification_yml_file = options.get("CLASSIFICATION_YML", None)
        self.output_classification_yml = os.path.join(self.output_dir,
                                                      "classification.yml")
        # XOR - only runs if either but not both yml's are None
        if (self.data_yml_file is None) ^ (self.classification_yml_file is
                                           None):
            self.logger.error(
                f"If using yml inputs, both 'DATA_YML' (currently {self.data_yml} and 'CLASSIFICATION_YML' (currently {self.classification_yml}) must be provided"
            )
        elif self.data_yml_file is not None:
            with open(self.data_yml_file, 'r') as f:
                self.data_yml = f.read()
            with open(self.classification_yml_file, 'r') as f:
                self.classification_yml = f.read()
            self.has_yml = True
            self.variant = self.get_variant_from_yml(self.classification_yml)
        else:
            self.data_yml = None
            self.classification_yml = None
            self.has_yml = False

        self.batch_file = self.options.get("BATCH_FILE")
        if self.batch_file is not None:
            self.batch_file = get_data_loc(self.batch_file)
        self.batch_replace = self.options.get("BATCH_REPLACE", {})

        self.validate_model()

        assert self.norm in [
            "global",
            "cosmo",
            "perfilter",
            "cosmo_quantile",
            "none",
        ], f"Norm option is set to {self.norm}, needs to be one of 'global', 'cosmo', 'perfilter', 'cosmo_quantile"
        assert self.variant in [
            "vanilla", "variational", "bayesian"
        ], f"Variant {self.variant} is not vanilla, variational or bayesian"
        self.slurm = """{sbatch_header}
        {task_setup}

        """
        self.conda_env = self.global_config["SuperNNova"]["conda_env"]
        self.path_to_classifier = get_output_loc(
            self.global_config["SuperNNova"]["location"])
Exemple #12
0
    def classify(self, training):
        model = self.options.get("MODEL")
        model_path = ""
        if not training:
            assert model is not None, "If TRAIN is not specified, you have to point to a model to use"
            if not os.path.exists(get_output_loc(model)):
                for t in self.dependencies:
                    if model == t.name:
                        self.logger.debug(
                            f"Found task dependency {t.name} with model file {t.output['model_filename']}"
                        )
                        model = t.output["model_filename"]
            model_path = get_output_loc(model)
            self.logger.debug(f"Looking for model in {model_path}")
            assert os.path.exists(model_path), f"Cannot find {model_path}"

        types = self.get_types()
        if types is None:
            types = OrderedDict({
                "1": "Ia",
                "0": "unknown",
                "2": "SNIax",
                "3": "SNIa-pec",
                "20": "SNIIP",
                "21": "SNIIL",
                "22": "SNIIn",
                "29": "SNII",
                "32": "SNIb",
                "33": "SNIc",
                "39": "SNIbc",
                "41": "SLSN-I",
                "42": "SLSN-II",
                "43": "SLSN-R",
                "80": "AGN",
                "81": "galaxy",
                "98": "None",
                "99": "pending",
                "101": "Ia",
                "120": "SNII",
                "130": "SNIbc",
            })
        else:
            has_ia = False
            has_cc = False
            self.logger.debug(f"Input types set to {types}")
            for key, value in types.items():
                if value.upper() == "IA":
                    has_ia = True
                elif value.upper() in ["II", "IBC"]:
                    has_cc = True
            if not has_ia:
                self.logger.debug("No Ia type found, injecting type")
                types[1] = "Ia"
                types = dict(
                    sorted(types.items(),
                           key=lambda x: -1 if x[0] == 1 else x[0]))
                self.logger.debug(f"Inject types with Ias are {types}")
            if not has_cc:
                self.logger.debug("No cc type found, injecting type")
                types[29] = "II"
        str_types = json.dumps(types)
        self.logger.debug(f"Types set to {str_types}")

        sim_dep = self.get_simulation_dependency()
        light_curve_dir = sim_dep.output["photometry_dirs"][self.index]
        self.raw_dir = light_curve_dir
        fit = self.get_fit_dependency()
        fit_dir = f"" if fit is None else f"--fits_dir {fit['fitres_dirs'][self.index]}"
        cyclic = "--cyclic" if self.variant in ["vanilla", "variational"
                                                ] and self.cyclic else ""
        batch_size = f"--batch_size {self.batch_size}"
        num_layers = f"--num_layers {self.num_layers}"
        hidden_dim = f"--hidden_dim {self.hidden_dim}"
        variant = f"--model {self.variant}"
        if self.variant == "bayesian":
            variant += " --num_inference_samples 20"

        clump = sim_dep.output.get("clump_file")
        if clump is None:
            clump_txt = ""
        else:
            clump_txt = f"--photo_window_files {clump}"

        if self.batch_file is None:
            if self.gpu:
                self.sbatch_header = self.sbatch_gpu_header
            else:
                self.sbatch_header = self.sbatch_cpu_header
        else:
            with open(self.batch_file, 'r') as f:
                self.sbatch_header = f.read()
            self.sbatch_header = self.clean_header(self.sbatch_header)

        if self.has_yml:
            self.update_yml()
            setup_file = "supernnova_yml"
        else:
            setup_file = "supernnova"

        header_dict = {
            "REPLACE_NAME": self.job_base_name,
            "REPLACE_WALLTIME": "23:00:00",
            "REPLACE_LOGFILE": "output.log",
            "REPLACE_MEM": "32GB",
            "APPEND": ["#SBATCH --ntasks=1", "#SBATCH --cpus-per-task=1"]
        }
        header_dict = merge_dict(header_dict, self.batch_replace)
        self.update_header(header_dict)

        setup_dict = {
            "conda_env":
            self.conda_env,
            "dump_dir":
            self.dump_dir,
            "photometry_dir":
            light_curve_dir,
            "fit_dir":
            fit_dir,
            "path_to_classifier":
            self.path_to_classifier,
            "job_name":
            self.job_base_name,
            "command":
            "--train_rnn" if training else "--validate_rnn",
            "sntypes":
            str_types,
            "variant":
            variant,
            "cyclic":
            cyclic,
            "model":
            "" if training else f"--model_files {model_path}",
            "phot":
            "",
            "test_or_train":
            "" if training else "--data_testing",
            "redshift":
            "--redshift " + self.redshift,
            "norm":
            "--norm " + self.norm,
            "done_file":
            self.done_file,
            "clump":
            clump_txt,
            "done_file2":
            self.done_file2,
            "partition":
            "gpu2" if self.gpu else "broadwl",
            "gres":
            "#SBATCH --gres=gpu:1" if self.gpu else "",
            "cuda":
            "--use_cuda" if self.gpu else "",
            "clean_command":
            f"rm -rf {self.dump_dir}/processed" if self.clean else "",
            "seed":
            f"--seed {self.seed}" if self.seed else "",
            "batch_size":
            batch_size,
            "num_layers":
            num_layers,
            "hidden_dim":
            hidden_dim,
            "data_yml":
            self.output_data_yml,
            "classification_yml":
            self.output_classification_yml,
            "classification_command":
            "train_rnn" if training else "validate_rnn"
        }

        format_dict = {
            "sbatch_header":
            self.sbatch_header,
            "task_setup":
            self.update_setup(setup_dict, self.task_setup[setup_file])
        }

        slurm_output_file = self.output_dir + "/job.slurm"
        self.logger.info(
            f"Running SuperNNova, slurm job outputting to {slurm_output_file}")
        slurm_text = self.slurm.format(**format_dict)

        new_hash = self.get_hash_from_string(slurm_text)

        if not self._check_regenerate(new_hash):
            self.should_be_done()
        else:
            self.logger.info("Rerunning. Cleaning output_dir")
            shutil.rmtree(self.output_dir, ignore_errors=True)
            mkdirs(self.output_dir)
            if self.has_yml:
                with open(self.output_data_yml, 'w') as f:
                    f.write(self.data_yml)
                with open(self.output_classification_yml, 'w') as f:
                    f.write(self.classification_yml)

            self.save_new_hash(new_hash)

            with open(slurm_output_file, "w") as f:
                f.write(slurm_text)

            self.logger.info(
                f"Submitting batch job to {'train' if training else 'predict using'} SuperNNova"
            )
            subprocess.run(["sbatch", slurm_output_file], cwd=self.output_dir)

        return True
Exemple #13
0
    def __init__(self,
                 name,
                 output_dir,
                 options,
                 global_config,
                 dependencies=None):
        super().__init__(name, output_dir, dependencies=dependencies)
        self.options = options
        self.global_config = global_config

        self.job_name = os.path.basename(
            Path(output_dir).parents[1]) + "_COSMOMC_" + name
        self.logfile = os.path.join(self.output_dir, "output.log")

        self.path_to_cosmomc = get_output_loc(
            self.global_config["CosmoMC"]["location"])

        self.create_cov_dep = self.get_dep(CreateCov)
        self.blind = self.create_cov_dep.output[
            "blind"] if self.create_cov_dep is not None else self.options.get(
                "BLIND", False)
        assert isinstance(
            self.blind,
            (bool, np.bool_)), "Blind should be set to a boolan value!"
        self.ini_prefix = options.get("INI")
        self.static = self.ini_prefix in ["cmb_omw", "cmb_omol"]
        self.static_path = "cosmomc_static_chains/"

        if self.create_cov_dep is None:
            self.ini_files = [f"{self.ini_prefix}.ini"]
            self.num_walkers = 4
            self.covopts = ["ALL"]
            self.covopts_numbers = [0]
            self.labels = [self.name]
            self.num_jobs = 1
        else:
            self.num_walkers = options.get("NUM_WALKERS", 8)
            avail_cov_opts = self.create_cov_dep.output["covopts"]
            self.covopts = options.get("COVOPTS") or list(
                avail_cov_opts.keys())
            self.covopts_numbers = [avail_cov_opts[k] for k in self.covopts]

            self.ini_files = [
                f"{self.ini_prefix}_{num}.ini" for num in self.covopts_numbers
            ]

            self.output["hubble_plot"] = self.create_cov_dep.output[
                "hubble_plot"]
            self.output["bcor_name"] = self.create_cov_dep.output["bcor_name"]
            self.labels = [self.name + "_" + c for c in self.covopts]
            self.num_jobs = len(self.covopts)

        self.chain_dir = os.path.join(self.output_dir, "chains/")
        self.param_dict = {
            l: os.path.join(self.chain_dir, i.replace(".ini", ".paramnames"))
            for l, i in zip(self.covopts, self.ini_files)
        }

        self.done_files = [f"done_{num}.txt" for num in self.covopts_numbers]
        self.chain_dict = {
            l: os.path.join(self.chain_dir, i.replace(".ini", f"_{n + 1}.txt"))
            for l, i in zip(self.covopts, self.ini_files)
            for n in range(self.num_walkers)
        }
        self.base_dict = {
            l: os.path.join(self.chain_dir, i.replace(".ini", ""))
            for l, i in zip(self.covopts, self.ini_files)
            for n in range(self.num_walkers)
        }
        self.output["chain_dir"] = self.chain_dir
        self.output["param_dict"] = self.param_dict
        self.output["chain_dict"] = self.chain_dict
        self.output["base_dict"] = self.base_dict
        self.output["covopts"] = self.covopts
        self.output["blind"] = self.blind

        self.output["label"] = (self.options.get(
            "LABEL",
            f"({' + '.join(self.ini_prefix.upper().split('_')[:-1])})") + " " +
                                (self.create_cov_dep.output["name"]
                                 if self.create_cov_dep is not None else ""))
        # TODO: Better logic here please
        final = self.ini_prefix.split("_")[-1]
        ps = {
            "omw": ["omegam", "w"],
            "omol": ["omegam", "omegal"],
            "wnu": ["w", "nu"],
            "wwa": ["w", "wa"]
        }
        self.output["cosmology_params"] = ps[final]

        self.slurm = """#!/bin/bash
Exemple #14
0
    def __init__(self,
                 name,
                 output_dir,
                 dependencies,
                 mode,
                 options,
                 index=0,
                 model_name=None):
        super().__init__(name,
                         output_dir,
                         dependencies,
                         mode,
                         options,
                         index=index,
                         model_name=model_name)
        self.global_config = get_config()
        self.dump_dir = output_dir + "/dump"
        self.job_base_name = os.path.basename(output_dir)
        self.gpu = True
        self.tmp_output = None
        self.done_file = os.path.join(self.output_dir, "done_task.txt")
        self.done_file2 = os.path.join(self.output_dir, "done_task2.txt")
        self.variant = options.get("VARIANT", "vanilla").lower()
        self.redshift = "zspe" if options.get("REDSHIFT", True) else "none"
        self.norm = options.get("NORM", "cosmo")
        self.validate_model()

        assert self.norm in [
            "global", "cosmo", "perfilter"
        ], f"Norm option is set to {self.norm}, needs to be one of 'global', 'cosmo', 'perfilter'"
        assert self.variant in [
            "vanilla", "variational", "bayesian"
        ], f"Variant {self.variant} is not vanilla, variational or bayesian"
        self.slurm = """#!/bin/bash

#SBATCH --job-name={job_name}
#SBATCH --time=23:00:00
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --partition=gpu2
#SBATCH --gres=gpu:1
#SBATCH --output=output.log
#SBATCH --account=pi-rkessler
#SBATCH --mem=64GB

source activate {conda_env}
module load cuda
echo `which python`
cd {path_to_classifier}
echo "#################TIMING  Starting here:   `date`"
python run.py --data --sntypes '{sntypes}' --dump_dir {dump_dir} --raw_dir {photometry_dir} {fit_dir} {phot} {clump} {norm} {test_or_train}
if [ $? -ne 0 ]; then
    echo FAILURE > {done_file2}
else
    echo "#################TIMING  Database done now, starting classifier:   `date`"
    python run.py --use_cuda {cyclic} --sntypes '{sntypes}' --done_file {done_file} --batch_size 20 --dump_dir {dump_dir} {cyclic} {variant} {model} {phot} {redshift} {norm} {command}
    if [ $? -eq 0 ]; then
        rm -rf {dump_dir}/processed
        echo SUCCESS > {done_file2}
    else
        echo FAILURE > {done_file2}
    fi
fi
echo "#################TIMING  Classifier finished:   `date`"
        """
        self.conda_env = self.global_config["SuperNNova"]["conda_env"]
        self.path_to_classifier = get_output_loc(
            self.global_config["SuperNNova"]["location"])
Exemple #15
0
    def classify(self, training, force_refresh):
        model = self.options.get("MODEL")
        model_path = ""
        if not training:
            assert model is not None, "If TRAIN is not specified, you have to point to a model to use"
            if not os.path.exists(get_output_loc(model)):
                for t in self.dependencies:
                    if model == t.name:
                        self.logger.debug(
                            f"Found task dependency {t.name} with model file {t.output['model_filename']}"
                        )
                        model = t.output["model_filename"]
            model_path = get_output_loc(model)
            self.logger.debug(f"Looking for model in {model_path}")
            assert os.path.exists(model_path), f"Cannot find {model_path}"

        types = self.get_types()
        if types is None:
            types = OrderedDict({
                "1": "Ia",
                "0": "unknown",
                "2": "SNIax",
                "3": "SNIa-pec",
                "20": "SNIIP",
                "21": "SNIIL",
                "22": "SNIIn",
                "29": "SNII",
                "32": "SNIb",
                "33": "SNIc",
                "39": "SNIbc",
                "41": "SLSN-I",
                "42": "SLSN-II",
                "43": "SLSN-R",
                "80": "AGN",
                "81": "galaxy",
                "98": "None",
                "99": "pending",
                "101": "Ia",
                "120": "SNII",
                "130": "SNIbc",
            })
        else:
            has_ia = False
            has_cc = False
            self.logger.debug(f"Input types set to {types}")
            for key, value in types.items():
                if value.upper() == "IA":
                    has_ia = True
                elif value.upper() in ["II", "IBC"]:
                    has_cc = True
            if not has_ia:
                self.logger.debug("No Ia type found, injecting type")
                types.update({"1": "Ia"})
                types.move_to_end("1", last=False)
            if not has_cc:
                self.logger.debug("No cc type found, injecting type")
                types.update({"29": "II"})
        str_types = json.dumps(types)
        self.logger.debug(f"Types set to {str_types}")

        sim_dep = self.get_simulation_dependency()
        light_curve_dir = sim_dep.output["photometry_dirs"][self.index]
        fit = self.get_fit_dependency()
        fit_dir = f"" if fit is None else f"--fits_dir {fit['fitres_dirs'][self.index]}"
        cyclic = "--cyclic" if self.variant in ["vanilla", "variational"
                                                ] else ""
        variant = f"--model {self.variant}"
        if self.variant == "bayesian":
            variant += " --num_inference_samples 20"

        clump = sim_dep.output.get("clump_file")
        if clump is None:
            clump_txt = ""
        else:
            clump_txt = f"--photo_window_files {clump}"

        format_dict = {
            "conda_env": self.conda_env,
            "dump_dir": self.dump_dir,
            "photometry_dir": light_curve_dir,
            "fit_dir": fit_dir,
            "path_to_classifier": self.path_to_classifier,
            "job_name": self.job_base_name,
            "command": "--train_rnn" if training else "--validate_rnn",
            "sntypes": str_types,
            "variant": variant,
            "cyclic": cyclic,
            "model": "" if training else f"--model_files {model_path}",
            "phot": "",
            "test_or_train": "" if training else "--data_testing",
            "redshift": "--redshift " + self.redshift,
            "norm": "--norm " + self.norm,
            "done_file": self.done_file,
            "clump": clump_txt,
            "done_file2": self.done_file2,
        }

        slurm_output_file = self.output_dir + "/job.slurm"
        self.logger.info(
            f"Running SuperNNova, slurm job outputting to {slurm_output_file}")
        slurm_text = self.slurm.format(**format_dict)

        old_hash = self.get_old_hash()
        new_hash = self.get_hash_from_string(slurm_text)

        if not force_refresh and new_hash == old_hash:
            self.logger.info("Hash check passed, not rerunning")
            self.should_be_done()
        else:
            self.logger.info("Rerunning. Cleaning output_dir")
            shutil.rmtree(self.output_dir, ignore_errors=True)
            mkdirs(self.output_dir)
            self.save_new_hash(new_hash)

            with open(slurm_output_file, "w") as f:
                f.write(slurm_text)

            self.logger.info(
                f"Submitting batch job to {'train' if training else 'predict using'} SuperNNova"
            )
            subprocess.run(["sbatch", slurm_output_file], cwd=self.output_dir)

        return True
Exemple #16
0
    def __init__(self, name, output_dir, config, dependencies, options, recal_aggtask):
        super().__init__(name, output_dir, config=config, dependencies=dependencies)
        self.passed = False
        self.classifiers = [d for d in dependencies if isinstance(d, Classifier)]
        self.lcfit_deps = [c.get_fit_dependency(output=False) for c in self.classifiers]
        self.lcfit_names = list(set([l.output["name"] for l in self.lcfit_deps if l is not None]))
        self.output["lcfit_names"] = self.lcfit_names
        if not self.lcfit_names:
            self.logger.debug("No jobs depend on the LCFIT, so adding a dummy one")
            self.lcfit_names = [""]

        self.sim_task = self.get_underlying_sim_task()
        self.output["sim_name"] = self.sim_task.name
        self.recal_aggtask = recal_aggtask
        self.num_versions = len(self.sim_task.output["sim_folders"])

        self.output_dfs = [os.path.join(self.output_dir, f"merged_{i}.csv") for i in range(self.num_versions)]
        self.output_dfs_key = [[os.path.join(self.output_dir, f"merged_{l}_{i}.key") for l in self.lcfit_names] for i in range(self.num_versions)]
        self.output_cals = [os.path.join(self.output_dir, f"calibration_{i}.csv") for i in range(self.num_versions)]

        self.id = "CID"
        self.type_name = "SNTYPE"
        self.options = options
        self.include_type = bool(options.get("INCLUDE_TYPE", False))
        self.plot = options.get("PLOT", False)
        self.plot_all = options.get("PLOT_ALL", False)
        self.output["classifier_names"] = [c.name for c in self.classifiers]
        self.output["classifier_indexes"] = [c.index for c in self.classifiers]
        self.output["calibration_files"] = self.output_cals
        self.output["empty_agg"] = False
        if isinstance(self.plot, bool):
            self.python_file = os.path.dirname(inspect.stack()[0][1]) + "/external/aggregator_plot.py"
        else:
            self.python_file = self.plot
        self.python_file = get_output_loc(self.python_file)

        if not os.path.exists(self.python_file):
            Task.fail_config(f"Attempting to find python file {self.python_file} but it's not there!")

        merge_classifiers = self.config.get("MERGE_CLASSIFIERS")
        self.classifier_merge = {c.output['name']: c.get_prob_column_name() for c in self.classifiers}
        if merge_classifiers is not None:
            self.classifier_merge = dict()
            for c in self.classifiers:
                prob_col = []
                for prob_col_name in merge_classifiers.keys():
                    mask_list = ensure_list(merge_classifiers[prob_col_name])
                    match = False
                    for m in mask_list:
                        if match:
                            continue
                        else:
                            if m in c.output['name']:
                                match = True
                    if match:
                        if prob_col_name[:5] != "PROB_":
                            prob_col_name = "PROB_" + prob_col_name
                        prob_col.append(prob_col_name)
                if len(prob_col) == 1:
                    self.classifier_merge[c.output['name']] = prob_col[0]
                else:
                    if len(prob_col) == 0:
                        self.classifier_merge[c.output['name']] = c.get_prob_column_name()
                    else:
                        Task.fail_config(f"Classifier task {c.output['name']} matched multiple MERGE_CLASSIFIERS keys: {prob_col}. Please provide more specific keys")
        self.logger.debug(f"Classifier merge = {self.classifier_merge}")
        self.output["classifier_merge"] = self.classifier_merge
Exemple #17
0
    def get_tasks(c, prior_tasks, base_output_dir, stage_number, prefix,
                  global_config):
        from pippin.classifiers.factory import ClassifierFactory

        def _get_clas_output_dir(base_output_dir,
                                 stage_number,
                                 sim_name,
                                 fit_name,
                                 clas_name,
                                 index=None,
                                 extra=None):
            sim_name = "" if sim_name is None or fit_name is not None else "_" + sim_name
            fit_name = "" if fit_name is None else "_" + fit_name
            extra_name = "" if extra is None else "_" + extra
            index = "" if index is None else f"_{index}"
            return f"{base_output_dir}/{stage_number}_CLAS/{clas_name}{index}{sim_name}{fit_name}{extra_name}"

        def get_num_ranseed(sim_task, lcfit_task):
            if sim_task is not None:
                return len(sim_task.output["sim_folders"])
            if lcfit_task is not None:
                return len(lcfit_task.output["fitres_dirs"])
            raise ValueError(
                "Classifier dependency has no sim_task or lcfit_task?")

        tasks = []
        lcfit_tasks = Task.get_task_of_type(prior_tasks, SNANALightCurveFit)
        sim_tasks = Task.get_task_of_type(prior_tasks, DataPrep,
                                          SNANASimulation)
        for clas_name in c.get("CLASSIFICATION", []):
            config = c["CLASSIFICATION"][clas_name]
            name = config["CLASSIFIER"]
            cls = ClassifierFactory.get(name)
            options = config.get("OPTS", {})
            if "MODE" not in config:
                Task.fail_config(
                    f"Classifier task {clas_name} needs to specify MODE as train or predict"
                )
            mode = config["MODE"].lower()
            assert mode in ["train", "predict"
                            ], "MODE should be either train or predict"
            if mode == "train":
                mode = Classifier.TRAIN
            else:
                mode = Classifier.PREDICT

            # Validate that train is not used on certain classifiers
            if mode == Classifier.TRAIN:
                assert name not in [
                    "PerfectClassifier", "UnityClassifier", "FitProbClassifier"
                ], f"Can not use train mode with {name}"

            needs_sim, needs_lc = cls.get_requirements(options)

            runs = []
            if needs_sim and needs_lc:
                runs = [(l.dependencies[0], l) for l in lcfit_tasks]
            elif needs_sim:
                runs = [(s, None) for s in sim_tasks]
            elif needs_lc:
                runs = [(l.dependencies[0], l) for l in lcfit_tasks]
            else:
                Task.logger.warn(
                    f"Classifier {name} does not need sims or fits. Wat.")

            num_gen = 0
            mask = config.get("MASK", "")
            mask_sim = config.get("MASK_SIM", "")
            mask_fit = config.get("MASK_FIT", "")
            for s, l in runs:

                sim_name = s.name if s is not None else None
                fit_name = l.name if l is not None else None
                matched_sim = True
                matched_fit = True
                if mask:
                    matched_sim = matched_sim and mask in sim_name
                if mask_sim:
                    matched_sim = matched_sim and mask_sim in sim_name
                if mask:
                    matched_fit = matched_fit and mask in sim_name
                if mask_fit:
                    matched_fit = matched_fit and mask_sim in sim_name
                if not matched_fit or not matched_sim:
                    continue
                deps = []
                if s is not None:
                    deps.append(s)
                if l is not None:
                    deps.append(l)

                model = options.get("MODEL")

                # Validate to make sure training samples only have one sim.
                if mode == Classifier.TRAIN:
                    if s is not None:
                        folders = s.output["sim_folders"]
                        assert (
                            len(folders) == 1
                        ), f"Training requires one version of the sim, you have {len(folders)} for sim task {s}. Make sure your training sim doesn't set RANSEED_CHANGE"
                    if l is not None:
                        folders = l.output["fitres_dirs"]
                        assert (
                            len(folders) == 1
                        ), f"Training requires one version of the lcfits, you have {len(folders)} for lcfit task {l}. Make sure your training sim doesn't set RANSEED_CHANGE"
                if model is not None:
                    if "/" in model or "." in model:
                        potential_path = get_output_loc(model)
                        if os.path.exists(potential_path):
                            extra = os.path.basename(
                                os.path.dirname(potential_path))

                            # Nasty duplicate code, TODO fix this
                            indexes = get_num_ranseed(s, l)
                            for i in range(indexes):
                                num = i + 1 if indexes > 1 else None
                                clas_output_dir = _get_clas_output_dir(
                                    base_output_dir,
                                    stage_number,
                                    sim_name,
                                    fit_name,
                                    clas_name,
                                    index=num,
                                    extra=extra)
                                cc = cls(clas_name,
                                         clas_output_dir,
                                         config,
                                         deps,
                                         mode,
                                         options,
                                         index=i,
                                         model_name=extra)
                                Task.logger.info(
                                    f"Creating classification task {name} with {cc.num_jobs} jobs, for LC fit {fit_name} on simulation {sim_name} and index {i}"
                                )
                                num_gen += 1
                                tasks.append(cc)

                        else:
                            Task.fail_config(
                                f"Your model {model} looks like a path, but I couldn't find a model at {potential_path}"
                            )
                    else:
                        for t in tasks:
                            if model == t.name:
                                # deps.append(t)
                                extra = t.get_unique_name()

                                assert t.__class__ == cls, f"Model {clas_name} with class {cls} has model {model} with class {t.__class__}, they should match!"

                                indexes = get_num_ranseed(s, l)
                                for i in range(indexes):
                                    num = i + 1 if indexes > 1 else None
                                    clas_output_dir = _get_clas_output_dir(
                                        base_output_dir,
                                        stage_number,
                                        sim_name,
                                        fit_name,
                                        clas_name,
                                        index=num,
                                        extra=extra)
                                    cc = cls(clas_name,
                                             clas_output_dir,
                                             config,
                                             deps + [t],
                                             mode,
                                             options,
                                             index=i)
                                    Task.logger.info(
                                        f"Creating classification task {name} with {cc.num_jobs} jobs, for LC fit {fit_name} on simulation {sim_name} and index {i}"
                                    )
                                    num_gen += 1
                                    tasks.append(cc)
                else:

                    indexes = get_num_ranseed(s, l)
                    for i in range(indexes):
                        num = i + 1 if indexes > 1 else None
                        clas_output_dir = _get_clas_output_dir(base_output_dir,
                                                               stage_number,
                                                               sim_name,
                                                               fit_name,
                                                               clas_name,
                                                               index=num)
                        cc = cls(clas_name,
                                 clas_output_dir,
                                 config,
                                 deps,
                                 mode,
                                 options,
                                 index=i)
                        Task.logger.info(
                            f"Creating classification task {name} with {cc.num_jobs} jobs, for LC fit {fit_name} on simulation {sim_name} and index {i}"
                        )
                        num_gen += 1
                        tasks.append(cc)

            if num_gen == 0:
                Task.fail_config(
                    f"Classifier {clas_name} with masks |{mask}|{mask_sim}|{mask_fit}| matched no combination of sims and fits"
                )
        return tasks
Exemple #18
0
    def __init__(self,
                 name,
                 output_dir,
                 config,
                 options,
                 global_config,
                 dependencies=None):
        super().__init__(name,
                         output_dir,
                         config=config,
                         dependencies=dependencies)
        self.options = options
        self.global_config = get_config()

        self.logfile = os.path.join(self.output_dir, "output.log")
        self.conda_env = self.global_config["DataSkimmer"]["conda_env"]
        self.path_to_task = output_dir

        self.unparsed_raw = self.options.get("RAW_DIR")
        self.raw_dir = get_data_loc(self.unparsed_raw)
        if self.raw_dir is None:
            Task.fail_config(f"Unable to find {self.options.get('RAW_DIR')}")

        self.genversion = os.path.basename(self.raw_dir)
        self.data_path = os.path.dirname(self.raw_dir)
        if self.unparsed_raw == "$SCRATCH_SIMDIR" or "SNDATA_ROOT/SIM" in self.raw_dir:
            self.logger.debug("Removing PRIVATE_DATA_PATH from NML file")
            self.data_path = ""
        self.job_name = os.path.basename(
            Path(output_dir).parents[1]) + "_DATAPREP_" + self.name

        self.output_info = os.path.join(self.output_dir,
                                        f"{self.genversion}.YAML")
        self.output["genversion"] = self.genversion
        self.opt_setpkmjd = options.get("OPT_SETPKMJD", 16)
        self.photflag_mskrej = options.get("PHOTFLAG_MSKREJ", 1016)
        self.output["data_path"] = self.data_path
        self.output["photometry_dirs"] = [get_output_loc(self.raw_dir)]
        self.output["sim_folders"] = [get_output_loc(self.raw_dir)]
        self.output["raw_dir"] = self.raw_dir
        self.clump_file = os.path.join(self.output_dir,
                                       self.genversion + ".SNANA.TEXT")
        self.output["clump_file"] = self.clump_file
        self.output["ranseed_change"] = False
        is_sim = options.get("SIM", False)
        self.output["is_sim"] = is_sim
        self.output["blind"] = options.get("BLIND", True)

        self.types_dict = options.get("TYPES")
        if self.types_dict is None:
            self.types_dict = {
                "IA": [1],
                "NONIA": [
                    2, 20, 21, 22, 29, 30, 31, 32, 33, 39, 40, 41, 42, 43, 80,
                    81
                ]
            }
        else:
            for key in self.types_dict.keys():
                self.types_dict[key] = [int(c) for c in self.types_dict[key]]

        self.batch_file = self.options.get("BATCH_FILE")
        if self.batch_file is not None:
            self.batch_file = get_data_loc(self.batch_file)
        self.batch_replace = self.options.get("BATCH_REPLACE", {})

        self.logger.debug(f"\tIA types are {self.types_dict['IA']}")
        self.logger.debug(f"\tNONIA types are {self.types_dict['NONIA']}")
        self.output["types_dict"] = self.types_dict
        self.types = OrderedDict()
        for n in self.types_dict["IA"]:
            self.types.update({n: "Ia"})
        for n in self.types_dict["NONIA"]:
            self.types.update({n: "II"})
        self.output["types"] = self.types

        self.slurm = """{sbatch_header}
        {task_setup}"""

        self.clump_command = """#
Exemple #19
0
    def __init__(self,
                 name,
                 output_dir,
                 options,
                 global_config,
                 dependencies=None):
        super().__init__(name, output_dir, dependencies=dependencies)
        self.options = options
        self.global_config = get_config()

        self.logfile = os.path.join(self.output_dir, "output.log")
        self.conda_env = self.global_config["DataSkimmer"]["conda_env"]
        self.path_to_task = output_dir

        self.unparsed_raw = self.options.get("RAW_DIR")
        self.raw_dir = get_data_loc(self.unparsed_raw)
        if self.raw_dir is None:
            Task.fail_config(f"Unable to find {self.options.get('RAW_DIR')}")

        self.genversion = os.path.basename(self.raw_dir)
        self.data_path = os.path.dirname(self.raw_dir)
        if self.unparsed_raw == "$SCRATCH_SIMDIR" or "SNDATA_ROOT/SIM" in self.raw_dir:
            self.logger.debug("Removing PRIVATE_DATA_PATH from NML file")
            self.data_path = ""
        self.job_name = os.path.basename(
            Path(output_dir).parents[1]) + "_DATAPREP_" + self.name

        self.output["genversion"] = self.genversion
        self.output["data_path"] = self.data_path
        self.output["photometry_dirs"] = [get_output_loc(self.raw_dir)]
        self.output["sim_folders"] = [get_output_loc(self.raw_dir)]
        self.output["raw_dir"] = self.raw_dir
        self.clump_file = os.path.join(self.output_dir,
                                       self.genversion + ".SNANA.TEXT")
        self.output["clump_file"] = self.clump_file
        self.output["ranseed_change"] = False
        is_sim = options.get("SIM", False)
        self.output["is_sim"] = is_sim
        self.output["blind"] = options.get("BLIND", not is_sim)

        self.types_dict = options.get("TYPES")
        if self.types_dict is None:
            self.types_dict = {
                "IA": [1],
                "NONIA": [
                    2, 20, 21, 22, 29, 30, 31, 32, 33, 39, 40, 41, 42, 42, 43,
                    80, 81
                ]
            }
        else:
            for key in self.types_dict.keys():
                self.types_dict[key] = [int(c) for c in self.types_dict[key]]

        self.logger.debug(f"\tIA types are {self.types_dict['IA']}")
        self.logger.debug(f"\tNONIA types are {self.types_dict['NONIA']}")
        self.output["types_dict"] = self.types_dict
        self.types = OrderedDict()
        for n in self.types_dict["IA"]:
            self.types.update({n: "Ia"})
        for n in self.types_dict["NONIA"]:
            self.types.update({n: "II"})
        self.output["types"] = self.types

        self.slurm = """#!/bin/bash
#SBATCH --job-name={job_name}
#SBATCH --time=0:20:00
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --partition=broadwl
#SBATCH --output={log_file}
#SBATCH --account=pi-rkessler
#SBATCH --mem=2GB

cd {path_to_task}
snana.exe clump.nml
if [ $? -eq 0 ]; then
    echo SUCCESS > {done_file}
else
    echo FAILURE > {done_file}
fi
"""
        self.clump_command = """#
Exemple #20
0
    def classify(self, training, force_refresh):
        use_photometry = self.options.get("USE_PHOTOMETRY", False)
        model = self.options.get("MODEL")
        model_path = ""
        if not training:
            assert model is not None, "If TRAIN is not specified, you have to point to a model to use"
            for t in self.dependencies:
                if model == t.name:
                    self.logger.debug(
                        f"Found task dependency {t.name} with model file {t.output['model_filename']}"
                    )
                    model = t.output["model_filename"]

            model_path = get_output_loc(model)
            self.logger.debug(f"Looking for model in {model_path}")
            assert os.path.exists(model_path), f"Cannot find {model_path}"

        types = self.get_types()
        if types is None:
            types = OrderedDict({
                "1": "Ia",
                "0": "unknown",
                "2": "SNIax",
                "3": "SNIa-pec",
                "20": "SNIIP",
                "21": "SNIIL",
                "22": "SNIIn",
                "29": "SNII",
                "32": "SNIb",
                "33": "SNIc",
                "39": "SNIbc",
                "41": "SLSN-I",
                "42": "SLSN-II",
                "43": "SLSN-R",
                "80": "AGN",
                "81": "galaxy",
                "98": "None",
                "99": "pending"
            })
        str_types = json.dumps(types)

        sim_dep = self.get_simulation_dependency()
        light_curve_dir = sim_dep.output["photometry_dir"]
        fit = self.get_fit_dependency()
        fit_dir = f"" if fit is None else f"--fits_dir {fit['fitres_dir']}"
        cyclic = "--cyclic" if self.variant in ["vanilla", "variational"
                                                ] else ""
        variant = f"--model {self.variant}"

        clump = sim_dep.output.get("clump_file")
        if clump is None:
            clump_txt = ""
        else:
            clump_txt = f"--photo_window_files {clump}"

        format_dict = {
            "conda_env": self.conda_env,
            "dump_dir": self.dump_dir,
            "photometry_dir": light_curve_dir,
            "fit_dir": fit_dir,
            "path_to_classifier": self.path_to_classifier,
            "job_name": self.job_base_name,
            "command": "--train_rnn" if training else "--validate_rnn",
            "sntypes": str_types,
            "variant": variant,
            "cyclic": cyclic,
            "model": "" if training else f"--model_files {model_path}",
            "phot": "" if not use_photometry else "--source_data photometry",
            "test_or_train": "" if training else "--data_testing",
            "done_file": self.done_file,
            "clump": clump_txt
        }

        slurm_output_file = self.output_dir + "/job.slurm"
        self.logger.info(
            f"Running SuperNNova, slurm job outputting to {slurm_output_file}")
        slurm_text = self.slurm.format(**format_dict)

        old_hash = self.get_old_hash()
        new_hash = self.get_hash_from_string(slurm_text)

        if not force_refresh and new_hash == old_hash:
            self.logger.info("Hash check passed, not rerunning")
        else:
            self.logger.info("Rerunning. Cleaning output_dir")
            shutil.rmtree(self.output_dir, ignore_errors=True)
            mkdirs(self.output_dir)
            self.save_new_hash(new_hash)

            with open(slurm_output_file, "w") as f:
                f.write(slurm_text)

            self.logger.info(
                f"Submitting batch job to {'train' if training else 'predict using'} SuperNNova"
            )
            subprocess.run(["sbatch", slurm_output_file], cwd=self.output_dir)

        return True
Exemple #21
0
    def __init__(self,
                 name,
                 output_dir,
                 config,
                 options,
                 global_config,
                 dependencies=None):
        super().__init__(name,
                         output_dir,
                         config=config,
                         dependencies=dependencies)
        self.options = options
        self.global_config = global_config

        self.job_name = os.path.basename(
            Path(output_dir).parents[1]) + "_COSMOMC_" + name
        self.logfile = os.path.join(self.output_dir, "output.log")

        self.path_to_cosmomc = get_output_loc(
            self.global_config["CosmoMC"]["location"])

        self.create_cov_dep = self.get_dep(CreateCov)
        self.blind = self.create_cov_dep.output[
            "blind"] if self.create_cov_dep is not None else self.options.get(
                "BLIND", False)
        assert isinstance(
            self.blind,
            (bool, np.bool_)), "Blind should be set to a boolan value!"
        self.ini_prefix = options.get("INI").replace(".ini", "")
        self.static = self.ini_prefix.replace(".ini",
                                              "") in ["cmb_omw", "cmb_omol"]
        self.static_path = "cosmomc_static_chains/"

        if self.create_cov_dep is None:
            self.ini_files = [f"{self.ini_prefix}.ini"]
            self.num_walkers = 4
            self.covopts = ["ALL"]
            self.covopts_numbers = [0]
            self.labels = [self.name]
            self.num_jobs = 1
        else:
            self.num_walkers = options.get("NUM_WALKERS", 8)
            avail_cov_opts = self.create_cov_dep.output["covopts"]
            self.covopts = options.get("COVOPTS") or list(
                avail_cov_opts.keys())
            self.covopts_numbers = [avail_cov_opts[k] for k in self.covopts]

            self.ini_files = [
                f"{self.ini_prefix}_{num}.ini" for num in self.covopts_numbers
            ]

            self.output["hubble_plot"] = self.create_cov_dep.output[
                "hubble_plot"]
            self.output["bcor_name"] = self.create_cov_dep.output["bcor_name"]
            self.labels = [self.name + "_" + c for c in self.covopts]
            self.num_jobs = len(self.covopts)

        self.ntasks = 10
        self.logger.debug(f"Num Walkers: {self.num_walkers}")
        self.chain_dir = os.path.join(self.output_dir, "chains/")
        self.param_dict = {
            l: os.path.join(self.chain_dir, i.replace(".ini", ".paramnames"))
            for l, i in zip(self.covopts, self.ini_files)
        }

        self.done_files = [f"done_{num}.txt" for num in self.covopts_numbers]
        self.chain_dict = {
            l: os.path.join(self.chain_dir, i.replace(".ini", f"_{n + 1}.txt"))
            for l, i in zip(self.covopts, self.ini_files)
            for n in range(self.ntasks)
        }
        self.base_dict = {
            l: os.path.join(self.chain_dir, i.replace(".ini", ""))
            for l, i in zip(self.covopts, self.ini_files)
            for n in range(self.ntasks)
        }
        self.output["chain_dir"] = self.chain_dir
        self.output["param_dict"] = self.param_dict
        self.output["chain_dict"] = self.chain_dict
        self.output["base_dict"] = self.base_dict
        self.output["covopts"] = self.covopts
        self.output["blind"] = self.blind

        self.output["label"] = (self.options.get(
            "LABEL",
            f"({' + '.join(self.ini_prefix.upper().split('_')[:-1])})") + " " +
                                (self.create_cov_dep.output["name"]
                                 if self.create_cov_dep is not None else ""))
        # TODO: Better logic here please
        final = self.ini_prefix.split("_")[-1]
        ps = {
            "omw": ["omegam", "w"],
            "flatomol": ["omegam"],
            "omol": ["omegam", "omegal"],
            "wnu": ["w", "nu"],
            "wwa": ["w", "wa"]
        }
        if final not in ps.keys():
            self.fail_config(
                f"The filename passed in ({self.ini_prefix}) needs to have format 'components_cosmology.ini', where the cosmology is omw, omol, wnu or wwa. Is this a custom file?"
            )
        self.output["cosmology_params"] = ps[final]

        self.batch_file = self.options.get("BATCH_FILE")
        if self.batch_file is not None:
            self.batch_file = get_data_loc(self.batch_file)
        self.batch_replace = self.options.get("BATCH_REPLACE", {})

        self.slurm = """{sbatch_header}