Python get_config Examples, pippin.config.get_config Python Examples

Example #1

0

Show file

File: scone.py Project: Samreay/Pippin

    def __init__(self, name, output_dir, config, dependencies, mode, options, index=0, model_name=None):
        super().__init__(name, output_dir, config, dependencies, mode, options, index=index, model_name=model_name)
        self.global_config = get_config()
        self.options = options

        self.gpu = self.options.get("GPU", True)
        self.conda_env = self.global_config["SCONE"]["conda_env_cpu"] if not self.gpu else self.global_config["SCONE"]["conda_env_gpu"]
        self.path_to_classifier = self.global_config["SCONE"]["location"]

        self.job_base_name = os.path.basename(Path(output_dir).parents[1]) + "__" + os.path.basename(output_dir)

        self.batch_file = self.options.get("BATCH_FILE")
        if self.batch_file is not None:
            self.batch_file = get_data_loc(self.batch_file)
        self.batch_replace = self.options.get("BATCH_REPLACE", {})


        self.config_path = os.path.join(self.output_dir, "model_config.yml")
        self.heatmaps_path = os.path.join(self.output_dir, "heatmaps")
        self.csvs_path = os.path.join(self.output_dir, "sim_csvs")
        self.slurm = """{sbatch_header}
        {task_setup}"""

        self.logfile = os.path.join(self.output_dir, "output.log")

        remake_heatmaps = self.options.get("REMAKE_HEATMAPS", False)
        self.keep_heatmaps = not remake_heatmaps

Example #2

0

Show file

    def __init__(self, name, output_dir, dependencies, options, merged_data,
                 merged_iasim, merged_ccsim, classifier):
        self.data_dir = os.path.dirname(inspect.stack()[0][1]) + "/data_files/"
        super().__init__(name,
                         output_dir,
                         os.path.join(self.data_dir, "bbc.input"),
                         "=",
                         dependencies=dependencies)

        self.options = options
        self.logging_file = os.path.join(self.output_dir, "output.log")
        self.global_config = get_config()

        self.merged_data = merged_data
        self.merged_iasim = merged_iasim
        self.merged_ccsim = merged_ccsim

        self.bias_cor_fits = None
        self.cc_prior_fits = None
        self.data = None
        self.genversion = "_".join(
            [m.get_lcfit_dep()["sim_name"]
             for m in merged_data]) + "_" + classifier.name

        self.config_filename = f"{self.genversion}.input"  # Make sure this syncs with the tmp file name
        self.config_path = os.path.join(self.output_dir, self.config_filename)
        self.fit_output_dir = os.path.join(self.output_dir, "output")
        self.done_file = os.path.join(self.fit_output_dir, f"FITJOBS/ALL.DONE")
        self.probability_column_name = classifier.output["prob_column_name"]

        self.output["fit_output_dir"] = self.fit_output_dir

Example #3

0

Show file

    def __init__(self, filename, config_path, config, message_store):
        self.logger = get_logger()
        self.task_index = {t: i for i, t in enumerate(self.task_order)}
        self.message_store = message_store
        self.filename = filename
        self.filename_path = config_path
        self.run_config = config
        self.global_config = get_config()

        self.prefix = self.global_config["QUEUE"]["prefix"] + "_" + filename
        self.max_jobs = int(self.global_config["QUEUE"]["max_jobs"])
        self.max_jobs_gpu = int(self.global_config["QUEUE"]["max_gpu_jobs"])
        self.max_jobs_in_queue = int(
            self.global_config["QUEUE"]["max_jobs_in_queue"])
        self.max_jobs_in_queue_gpu = int(
            self.global_config["QUEUE"]["max_gpu_jobs_in_queue"])

        self.output_dir = os.path.join(get_output_dir(), self.filename)
        self.tasks = None
        self.num_jobs_queue = 0
        self.num_jobs_queue_gpu = 0

        self.start = None
        self.finish = None
        self.force_refresh = False

Example #4

0

Show file

    def __init__(self, name, output_dir, dependencies, mode, options):
        super().__init__(name, output_dir, dependencies, mode, options)
        self.global_config = get_config()
        self.dump_dir = output_dir + "/dump"
        self.job_base_name = os.path.basename(output_dir)

        self.tmp_output = None
        self.done_file = os.path.join(self.output_dir, "done_task.txt")
        self.variant = options.get("VARIANT", "vanilla").lower()
        assert self.variant in ["vanilla", "variational", "bayesian"], \
            f"Variant {self.variant} is not vanilla, variational or bayesian"
        self.slurm = """#!/bin/bash

#SBATCH --job-name={job_name}
#SBATCH --time=15:00:00
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --partition=gpu2
#SBATCH --gres=gpu:1
#SBATCH --output=output.log
#SBATCH --account=pi-rkessler
#SBATCH --mem=64GB

source activate {conda_env}
module load cuda
echo `which python`
cd {path_to_classifier}
python run.py --data --sntypes '{sntypes}' --dump_dir {dump_dir} --raw_dir {photometry_dir} {fit_dir} {phot} {clump} {test_or_train}
python run.py --use_cuda {cyclic} --sntypes '{sntypes}' --done_file {done_file} --dump_dir {dump_dir} {cyclic} {variant} {model} {phot} {command}
        """
        self.conda_env = self.global_config["SuperNNova"]["conda_env"]
        self.path_to_classifier = get_output_loc(
            self.global_config["SuperNNova"]["location"])

Example #5

0

Show file

    def __init__(self, filename, config):
        self.logger = get_logger()
        self.filename = filename
        self.run_config = config
        self.global_config = get_config()

        self.prefix = "PIP_" + filename
        self.output_dir = None

Example #6

0

Show file

File: snirf.py Project: skuhl99/Pippin

    def __init__(self, name, output_dir, dependencies, mode, options):
        super().__init__(name, output_dir, dependencies, mode, options)
        self.global_config = get_config()
        self.num_jobs = 4

        self.conda_env = self.global_config["ArgonneClassifier"]["conda_env"]
        self.path_to_classifier = get_output_loc(self.global_config["ArgonneClassifier"]["location"])
        self.job_base_name = os.path.basename(output_dir)
        self.features = options.get("FEATURES", "x1 c zHD x1ERR cERR PKMJDERR")
        self.model_pk_file = "modelpkl.pkl"
        self.output_pk_file = os.path.join(self.output_dir,  self.model_pk_file)

        self.slurm = """#!/bin/bash

Example #7

0

Show file

    def __init__(self, filename, config_path, config_raw, config,
                 message_store):
        self.logger = get_logger()
        self.task_index = {t: i for i, t in enumerate(self.task_order)}
        self.message_store = message_store
        self.filename = filename
        self.filename_path = config_path
        self.file_raw = config_raw
        self.run_config = config
        self.global_config = get_config()

        self.prefix = self.global_config["QUEUE"]["prefix"] + "_" + filename
        self.max_jobs = int(self.global_config["QUEUE"]["max_jobs"])
        self.max_jobs_gpu = int(self.global_config["QUEUE"]["max_gpu_jobs"])
        self.max_jobs_in_queue = int(
            self.global_config["QUEUE"]["max_jobs_in_queue"])
        self.max_jobs_in_queue_gpu = int(
            self.global_config["QUEUE"]["max_gpu_jobs_in_queue"])

        self.logger.debug(self.global_config.keys())

        self.sbatch_cpu_path = get_data_loc(
            self.global_config["SBATCH"]["cpu_location"])
        with open(self.sbatch_cpu_path, 'r') as f:
            self.sbatch_cpu_header = f.read()
        self.sbatch_gpu_path = get_data_loc(
            self.global_config["SBATCH"]["gpu_location"])
        with open(self.sbatch_gpu_path, 'r') as f:
            self.sbatch_gpu_header = f.read()
        self.sbatch_cpu_header = self.clean_header(self.sbatch_cpu_header)
        self.sbatch_gpu_header = self.clean_header(self.sbatch_gpu_header)
        self.setup_task_location = self.global_config["SETUP"]["location"]
        self.load_task_setup()

        self.output_dir = os.path.join(get_output_dir(), self.filename)
        self.tasks = None
        self.num_jobs_queue = 0
        self.num_jobs_queue_gpu = 0

        self.start = None
        self.finish = None
        self.force_refresh = False
        self.force_ignore_stage = None

        self.running = []
        self.done = []
        self.failed = []
        self.blocked = []

Example #8

0

Show file

    def __init__(self,
                 name,
                 output_dir,
                 config,
                 dependencies,
                 mode,
                 options,
                 index=0,
                 model_name=None):
        super().__init__(name,
                         output_dir,
                         config,
                         dependencies,
                         mode,
                         options,
                         index=index,
                         model_name=model_name)
        self.global_config = get_config()
        self.num_jobs = 1

        self.conda_env = self.global_config["SNIRF"]["conda_env"]

        self.path_to_classifier = os.path.dirname(inspect.stack()[0][1])
        self.job_base_name = os.path.basename(
            Path(output_dir).parents[1]) + "__" + os.path.basename(output_dir)
        self.features = options.get(
            "FEATURES",
            "zHD x1 c cERR x1ERR COV_x1_c COV_x1_x0 COV_c_x0 PKMJDERR")
        # self.model_pk_file = self.get_unique_name() + ".pkl"
        self.model_pk_file = "model.pkl"

        self.output_pk_file = os.path.join(self.output_dir, self.model_pk_file)
        self.predictions_filename = os.path.join(self.output_dir,
                                                 "predictions.csv")

        self.fitopt = options.get("FITOPT", "DEFAULT")

        self.batch_file = self.options.get("BATCH_FILE")
        if self.batch_file is not None:
            self.batch_file = get_data_loc(self.batch_file)
        self.batch_replace = self.options.get("BATCH_REPLACE", {})

        self.output["predictions_filename"] = self.predictions_filename
        self.output["model_filename"] = self.output_pk_file
        self.validate_model()

        self.slurm = """{sbatch_header}

Example #9

0

Show file

File: manager.py Project: evevkovacs/Pippin

    def __init__(self, filename, config, message_store):
        self.logger = get_logger()
        self.message_store = message_store
        self.filename = filename
        self.run_config = config
        self.global_config = get_config()

        self.prefix = self.global_config["GLOBAL"]["prefix"] + "_" + filename
        self.max_jobs = int(self.global_config["GLOBAL"]["max_jobs"])
        self.max_jobs_in_queue = int(self.global_config["GLOBAL"]["max_jobs_in_queue"])

        self.output_dir = os.path.abspath(os.path.dirname(inspect.stack()[0][1]) + "/../" + self.global_config['OUTPUT']['output_dir'] + "/" + self.filename)
        self.tasks = None

        self.start = None
        self.finish = None
        self.force_refresh = False

Example #10

0

Show file

File: nearest_neighbor_python.py Project: OmegaLambda1998/Pippin

    def __init__(self,
                 name,
                 output_dir,
                 dependencies,
                 mode,
                 options,
                 index=0,
                 model_name=None):
        super().__init__(name,
                         output_dir,
                         dependencies,
                         mode,
                         options,
                         index=index,
                         model_name=model_name)
        self.global_config = get_config()
        self.num_jobs = 1

        self.conda_env = self.global_config["SNIRF"]["conda_env"]

        self.path_to_classifier = os.path.dirname(inspect.stack()[0][1])
        self.job_base_name = os.path.basename(
            Path(output_dir).parents[1]) + "__" + os.path.basename(output_dir)
        self.features = options.get(
            "FEATURES",
            "zHD x1 c cERR x1ERR COV_x1_c COV_x1_x0 COV_c_x0 PKMJDERR")
        # self.model_pk_file = self.get_unique_name() + ".pkl"
        self.model_pk_file = "model.pkl"

        self.output_pk_file = os.path.join(self.output_dir, self.model_pk_file)
        self.predictions_filename = os.path.join(self.output_dir,
                                                 "predictions.csv")

        self.fitopt = options.get("FITOPT", "DEFAULT")
        lcfit = self.get_fit_dependency()
        self.fitres_filename = lcfit["fitopt_map"][self.fitopt]
        self.fitres_file = os.path.abspath(
            os.path.join(lcfit["fitres_dirs"][self.index],
                         self.fitres_filename))

        self.output["predictions_filename"] = self.predictions_filename
        self.output["model_filename"] = self.output_pk_file
        self.validate_model()

        self.slurm = """#!/bin/bash

Example #11

0

Show file

    def __init__(self,
                 name,
                 output_dir,
                 options,
                 global_config,
                 dependencies=None,
                 index=0):

        base_file = get_data_loc("create_cov/input_file.txt")
        super().__init__(name,
                         output_dir,
                         base_file,
                         default_assignment=": ",
                         dependencies=dependencies)

        self.options = options
        self.global_config = get_config()
        self.index = index
        self.job_name = os.path.basename(
            Path(output_dir).parents[1]) + "_CREATE_COV_" + name
        self.path_to_code = os.path.abspath(
            os.path.dirname(inspect.stack()[0][1]) + "/external")

        self.logfile = os.path.join(self.output_dir, "output.log")
        self.sys_file_in = get_data_loc(
            options.get("SYS_SCALE", "surveys/des/bbc/scale_5yr.list"))
        self.sys_file_out = os.path.join(self.output_dir, "sys_scale.LIST")
        self.chain_dir = os.path.join(self.output_dir, "chains/")
        self.config_dir = os.path.join(self.output_dir, "output")

        self.biascor_dep = self.get_dep(BiasCor, fail=True)
        self.output["blind"] = self.biascor_dep.output["blind"]
        self.input_file = os.path.join(
            self.output_dir,
            self.biascor_dep.output["subdirs"][index] + ".input")
        self.output["hubble_plot"] = self.biascor_dep.output["hubble_plot"]

        self.output["ini_dir"] = self.config_dir
        covopts_map = {"ALL": 0}
        for i, covopt in enumerate(self.options.get("COVOPTS", [])):
            covopts_map[covopt.split("]")[0][1:]] = i + 1
        self.output["covopts"] = covopts_map
        self.output["index"] = index
        self.output["bcor_name"] = self.biascor_dep.name
        self.slurm = """#!/bin/bash

Example #12

0

Show file

    def __init__(self, name, output_dir, options, dependencies=None):
        super().__init__(name, output_dir, dependencies=dependencies)
        self.options = options
        self.global_config = get_config()

        self.logfile = os.path.join(self.output_dir, "output.log")
        self.conda_env = self.global_config["DataSkimmer"]["conda_env"]
        self.path_to_task = output_dir

        self.raw_dir = self.options.get("RAW_DIR")
        self.genversion = os.path.basename(self.raw_dir)
        self.data_path = os.path.dirname(self.raw_dir)
        self.job_name = f"DATAPREP_{self.name}"

        self.output["genversion"] = self.genversion
        self.output["data_path"] = self.data_path
        self.output["photometry_dir"] = get_output_loc(self.raw_dir)
        self.output["raw_dir"] = self.raw_dir
        self.clump_file = os.path.join(self.output_dir,
                                       self.genversion + ".SNANA.TEXT")
        self.output["clump_file"] = self.clump_file

        self.slurm = """#!/bin/bash
#SBATCH --job-name={job_name}
#SBATCH --time=0:20:00
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --partition=broadwl
#SBATCH --output={log_file}
#SBATCH --account=pi-rkessler
#SBATCH --mem=2GB

cd {path_to_task}
snana.exe clump.nml
if [ $? -eq 0 ]; then
    echo SUCCESS > {done_file}
else
    echo FAILURE > {done_file}
fi
"""
        self.clump_command = """#

Example #13

0

Show file

    def __init__(self,
                 name,
                 output_dir,
                 config,
                 dependencies,
                 mode,
                 options,
                 index=0,
                 model_name=None):
        super().__init__(name,
                         output_dir,
                         config,
                         dependencies,
                         mode,
                         options,
                         index=index,
                         model_name=model_name)
        self.global_config = get_config()
        self.num_jobs = 4

        self.conda_env = self.global_config["SNIRF"]["conda_env"]
        self.path_to_classifier = get_output_loc(
            self.global_config["SNIRF"]["location"])
        self.job_base_name = os.path.basename(
            Path(output_dir).parents[1]) + "__" + os.path.basename(output_dir)
        self.features = options.get("FEATURES", "x1 c zHD x1ERR cERR PKMJDERR")
        self.validate_model()

        self.model_pk_file = "model.pkl"
        self.output_pk_file = os.path.join(self.output_dir, self.model_pk_file)
        self.fitopt = options.get("FITOPT", "DEFAULT")
        self.fitres_filename = None
        self.fitres_file = None

        self.batch_file = self.options.get("BATCH_FILE")
        if self.batch_file is not None:
            self.batch_file = get_data_loc(self.batch_file)
        self.batch_replace = self.options.get("BATCH_REPLACE", {})

        self.slurm = """{sbatch_header}

Example #14

0

Show file

def run(args):
    # Load YAML config file
    yaml_path = os.path.abspath(os.path.expandvars(args.yaml))
    assert os.path.exists(yaml_path), f"File {yaml_path} cannot be found."
    with open(yaml_path, "r") as f:
        config = yaml.safe_load(f)

    overwrites = config.get("GLOBAL")
    if config.get("GLOBALS") is not None:
        logging.warning(
            "Your config file has a GLOBALS section in it. If you're trying to overwrite cfg.yml, rename this to GLOBAL"
        )

    global_config = get_config(initial_path=args.config, overwrites=overwrites)

    config_filename = os.path.basename(args.yaml).split(".")[0].upper()
    output_dir = get_output_dir()
    logging_folder = os.path.abspath(os.path.join(output_dir, config_filename))

    if not args.check:
        mkdirs(logging_folder)

    message_store, logging_filename = setup_logging(config_filename,
                                                    logging_folder, args)

    for i, d in enumerate(global_config["DATA_DIRS"]):
        logging.debug(f"Data directory {i + 1} set as {d}")
        assert d is not None, "Data directory is none, which means it failed to resolve. Check the error message above for why."

    manager = Manager(config_filename, yaml_path, config, message_store)
    if args.start is not None:
        args.refresh = True
    manager.set_start(args.start)
    manager.set_finish(args.finish)
    manager.set_force_refresh(args.refresh)
    manager.execute(args.check)
    chown_file(logging_filename)
    return manager

Example #15

0

Show file

    def __init__(self,
                 name,
                 output_dir,
                 dependencies,
                 mode,
                 options,
                 index=0,
                 model_name=None):
        super().__init__(name,
                         output_dir,
                         dependencies,
                         mode,
                         options,
                         index=index,
                         model_name=model_name)
        self.global_config = get_config()
        self.num_jobs = 4

        self.conda_env = self.global_config["SNIRF"]["conda_env"]
        self.path_to_classifier = get_output_loc(
            self.global_config["SNIRF"]["location"])
        self.job_base_name = os.path.basename(
            Path(output_dir).parents[1]) + "__" + os.path.basename(output_dir)
        self.features = options.get("FEATURES", "x1 c zHD x1ERR cERR PKMJDERR")
        self.validate_model()

        self.model_pk_file = "model.pkl"
        self.output_pk_file = os.path.join(self.output_dir, self.model_pk_file)
        self.fitopt = options.get("FITOPT", "DEFAULT")
        lcfit = self.get_fit_dependency()
        self.fitres_filename = lcfit["fitopt_map"][self.fitopt]
        self.fitres_file = os.path.abspath(
            os.path.join(lcfit["fitres_dirs"][self.index],
                         self.fitres_filename))

        self.slurm = """#!/bin/bash

Example #16

0

Show file

File: dataprep.py Project: OmegaLambda1998/Pippin

    def __init__(self,
                 name,
                 output_dir,
                 options,
                 global_config,
                 dependencies=None):
        super().__init__(name, output_dir, dependencies=dependencies)
        self.options = options
        self.global_config = get_config()

        self.logfile = os.path.join(self.output_dir, "output.log")
        self.conda_env = self.global_config["DataSkimmer"]["conda_env"]
        self.path_to_task = output_dir

        self.unparsed_raw = self.options.get("RAW_DIR")
        self.raw_dir = get_data_loc(self.unparsed_raw)
        if self.raw_dir is None:
            Task.fail_config(f"Unable to find {self.options.get('RAW_DIR')}")

        self.genversion = os.path.basename(self.raw_dir)
        self.data_path = os.path.dirname(self.raw_dir)
        if self.unparsed_raw == "$SCRATCH_SIMDIR" or "SNDATA_ROOT/SIM" in self.raw_dir:
            self.logger.debug("Removing PRIVATE_DATA_PATH from NML file")
            self.data_path = ""
        self.job_name = os.path.basename(
            Path(output_dir).parents[1]) + "_DATAPREP_" + self.name

        self.output["genversion"] = self.genversion
        self.output["data_path"] = self.data_path
        self.output["photometry_dirs"] = [get_output_loc(self.raw_dir)]
        self.output["sim_folders"] = [get_output_loc(self.raw_dir)]
        self.output["raw_dir"] = self.raw_dir
        self.clump_file = os.path.join(self.output_dir,
                                       self.genversion + ".SNANA.TEXT")
        self.output["clump_file"] = self.clump_file
        self.output["ranseed_change"] = False
        is_sim = options.get("SIM", False)
        self.output["is_sim"] = is_sim
        self.output["blind"] = options.get("BLIND", not is_sim)

        self.types_dict = options.get("TYPES")
        if self.types_dict is None:
            self.types_dict = {
                "IA": [1],
                "NONIA": [
                    2, 20, 21, 22, 29, 30, 31, 32, 33, 39, 40, 41, 42, 42, 43,
                    80, 81
                ]
            }
        else:
            for key in self.types_dict.keys():
                self.types_dict[key] = [int(c) for c in self.types_dict[key]]

        self.logger.debug(f"\tIA types are {self.types_dict['IA']}")
        self.logger.debug(f"\tNONIA types are {self.types_dict['NONIA']}")
        self.output["types_dict"] = self.types_dict
        self.types = OrderedDict()
        for n in self.types_dict["IA"]:
            self.types.update({n: "Ia"})
        for n in self.types_dict["NONIA"]:
            self.types.update({n: "II"})
        self.output["types"] = self.types

        self.slurm = """#!/bin/bash
#SBATCH --job-name={job_name}
#SBATCH --time=0:20:00
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --partition=broadwl
#SBATCH --output={log_file}
#SBATCH --account=pi-rkessler
#SBATCH --mem=2GB

cd {path_to_task}
snana.exe clump.nml
if [ $? -eq 0 ]; then
    echo SUCCESS > {done_file}
else
    echo FAILURE > {done_file}
fi
"""
        self.clump_command = """#

Example #17

0

Show file

File: analyse.py Project: Samreay/Pippin

    def __init__(self, name, output_dir, config, options, dependencies=None):
        super().__init__(name,
                         output_dir,
                         config=config,
                         dependencies=dependencies)
        self.options = options
        self.global_config = get_config()

        self.logfile = os.path.join(self.output_dir, "output.log")

        self.job_name = os.path.basename(
            Path(output_dir).parents[1]) + "_ANALYSE_" + os.path.basename(
                output_dir)

        self.path_to_codes = []
        self.done_files = []

        self.plot_code_dir = os.path.join(
            os.path.dirname(inspect.stack()[0][1]), "external")

        self.covopts = options.get("COVOPTS")
        self.singular_blind = options.get("SINGULAR_BLIND", False)
        if isinstance(self.covopts, str):
            self.covopts = [self.covopts]

        self.cosmomc_input_files = []
        self.cosmomc_output_files = []
        self.cosmomc_covopts = []
        self.names = []
        self.params = []

        # Assuming all deps are cosmomc tasks
        self.cosmomc_deps = self.get_deps(CosmoMC)
        self.blind = np.any([c.output["blind"] for c in self.cosmomc_deps])
        if self.blind:
            self.blind_params = ["w", "om", "ol", "omegam", "omegal"]
        else:
            if options.get("BLIND", False):
                self.blind_params = options.get("BLIND")
            else:
                self.blind_params = []
        self.biascor_deps = self.get_deps(BiasCor)
        self.lcfit_deps = self.get_deps(SNANALightCurveFit)

        if self.cosmomc_deps:
            self.add_plot_script_to_run("parse_cosmomc.py")
            self.add_plot_script_to_run("plot_cosmomc.py")
            self.add_plot_script_to_run("plot_errbudget.py")
        if self.biascor_deps:
            self.add_plot_script_to_run("parse_biascor.py")
            self.add_plot_script_to_run("plot_biascor.py")
        if self.lcfit_deps:
            self.add_plot_script_to_run("parse_lcfit.py")
            self.add_plot_script_to_run("plot_histogram.py")
            self.add_plot_script_to_run("plot_efficiency.py")

        if self.options.get("ADDITIONAL_SCRIPTS") is not None:
            vals = ensure_list(self.options.get("ADDITIONAL_SCRIPTS"))
            for v in vals:
                self.add_plot_script_to_run(v)

        self.done_file = self.done_files[-1]

        for c in self.cosmomc_deps:
            for covopt in c.output["covopts"]:
                self.cosmomc_input_files.append(c.output["base_dict"][covopt])
                self.cosmomc_output_files.append(c.output["label"] + "_" +
                                                 covopt + ".csv.gz")
                self.cosmomc_covopts.append(covopt)
                self.names.append(c.output["label"].replace("_", " ") + " " +
                                  covopt)
                for p in c.output["cosmology_params"]:
                    if p not in self.params:
                        self.params.append(p)
            self.logger.debug(
                f"Analyse task will create CosmoMC plots with {len(self.cosmomc_input_files)} covopts/plots"
            )

        self.wsummary_files = [
            b.output["w_summary"] for b in self.biascor_deps
        ]

        # Get the fitres and m0diff files we'd want to parse for Hubble diagram plotting
        self.biascor_fitres_input_files = [
            os.path.join(m, "FITOPT000_MUOPT000.FITRES.gz")
            for b in self.biascor_deps for m in b.output["m0dif_dirs"]
        ]
        self.biascor_prob_col_names = [
            b.output["prob_column_name"] for b in self.biascor_deps
            for m in b.output["m0dif_dirs"]
        ]
        self.biascor_fitres_output_files = [
            b.name + "__" + os.path.basename(m).replace("OUTPUT_BBCFIT", "1") +
            "__FITOPT0_MUOPT0.fitres.gz" for b in self.biascor_deps
            for m in b.output["m0dif_dirs"]
        ]

        self.biascor_m0diffs = []
        self.biascor_m0diff_output = "all_biascor_m0diffs.csv"
        self.biascor_fitres_combined = "all_biascor_fitres.csv.gz"

        self.batch_file = self.options.get("BATCH_FILE")
        if self.batch_file is not None:
            self.batch_file = get_data_loc(self.batch_file)
        self.batch_replace = self.options.get("BATCH_REPLACE", {})

        self.slurm = """{sbatch_header}

Example #18

0

Show file

    def __init__(self,
                 name,
                 output_dir,
                 config,
                 dependencies,
                 mode,
                 options,
                 index=0,
                 model_name=None):
        super().__init__(name,
                         output_dir,
                         config,
                         dependencies,
                         mode,
                         options,
                         index=index,
                         model_name=model_name)
        self.global_config = get_config()
        self.dump_dir = output_dir + "/dump"
        self.job_base_name = os.path.basename(output_dir)
        self.gpu = config.get("GPU", True)
        self.tmp_output = None
        self.done_file = os.path.join(self.output_dir, "done_task.txt")
        self.done_file2 = os.path.join(self.output_dir, "done_task2.txt")
        self.variant = options.get("VARIANT", "vanilla").lower()
        self.redshift = "zspe" if options.get("REDSHIFT", True) else "none"
        self.norm = options.get("NORM", "cosmo")
        self.cyclic = options.get("CYCLIC", True)
        self.seed = options.get("SEED", 0)
        self.clean = config.get("CLEAN", True)
        self.batch_size = options.get("BATCH_SIZE", 128)
        self.num_layers = options.get("NUM_LAYERS", 2)
        self.hidden_dim = options.get("HIDDEN_DIM", 32)

        # Setup yml files
        self.data_yml_file = options.get("DATA_YML", None)
        self.output_data_yml = os.path.join(self.output_dir, "data.yml")
        self.classification_yml_file = options.get("CLASSIFICATION_YML", None)
        self.output_classification_yml = os.path.join(self.output_dir,
                                                      "classification.yml")
        # XOR - only runs if either but not both yml's are None
        if (self.data_yml_file is None) ^ (self.classification_yml_file is
                                           None):
            self.logger.error(
                f"If using yml inputs, both 'DATA_YML' (currently {self.data_yml} and 'CLASSIFICATION_YML' (currently {self.classification_yml}) must be provided"
            )
        elif self.data_yml_file is not None:
            with open(self.data_yml_file, 'r') as f:
                self.data_yml = f.read()
            with open(self.classification_yml_file, 'r') as f:
                self.classification_yml = f.read()
            self.has_yml = True
            self.variant = self.get_variant_from_yml(self.classification_yml)
        else:
            self.data_yml = None
            self.classification_yml = None
            self.has_yml = False

        self.batch_file = self.options.get("BATCH_FILE")
        if self.batch_file is not None:
            self.batch_file = get_data_loc(self.batch_file)
        self.batch_replace = self.options.get("BATCH_REPLACE", {})

        self.validate_model()

        assert self.norm in [
            "global",
            "cosmo",
            "perfilter",
            "cosmo_quantile",
            "none",
        ], f"Norm option is set to {self.norm}, needs to be one of 'global', 'cosmo', 'perfilter', 'cosmo_quantile"
        assert self.variant in [
            "vanilla", "variational", "bayesian"
        ], f"Variant {self.variant} is not vanilla, variational or bayesian"
        self.slurm = """{sbatch_header}
        {task_setup}

        """
        self.conda_env = self.global_config["SuperNNova"]["conda_env"]
        self.path_to_classifier = get_output_loc(
            self.global_config["SuperNNova"]["location"])

Example #19

0

Show file

File: create_cov.py Project: Samreay/Pippin

    def __init__(self,
                 name,
                 output_dir,
                 config,
                 options,
                 global_config,
                 dependencies=None,
                 index=0):

        base_file = get_data_loc("create_cov/input_file.txt")
        super().__init__(name,
                         output_dir,
                         config,
                         base_file,
                         default_assignment=": ",
                         dependencies=dependencies)

        if options is None:
            options = {}
        self.options = options
        self.templates_dir = self.options.get("INI_DIR", "cosmomc_templates")
        self.global_config = get_config()
        self.index = index
        self.job_name = os.path.basename(
            Path(output_dir).parents[1]) + "_CREATE_COV_" + name
        #self.path_to_code = os.path.abspath(os.path.dirname(inspect.stack()[0][1]) + "/external/")
        self.path_to_code = '$SNANA_DIR/util/'  #Now maintained by SNANA

        self.batch_mem = options.get("BATCH_MEM", "4GB")

        self.logfile = os.path.join(self.output_dir, "output.log")
        self.sys_file_out = os.path.join(self.output_dir, "sys_scale.yml")
        self.chain_dir = os.path.join(self.output_dir, "chains/")
        self.config_dir = os.path.join(self.output_dir, "output")
        self.subtract_vpec = options.get("SUBTRACT_VPEC", False)
        self.unbinned_covmat_addin = options.get("UNBINNED_COVMAT_ADDIN", [])

        self.batch_file = self.options.get("BATCH_FILE")
        if self.batch_file is not None:
            self.batch_file = get_data_loc(self.batch_file)
        self.batch_replace = self.options.get("BATCH_REPLACE", {})

        self.binned = options.get("BINNED", not self.subtract_vpec)
        self.rebinned_x1 = options.get("REBINNED_X1", "")
        if self.rebinned_x1 != "":
            self.rebinned_x1 = f"--nbin_x1 {self.rebinned_x1}"
        self.rebinned_c = options.get("REBINNED_C", "")
        if self.rebinned_c != "":
            self.rebinned_c = f"--nbin_c {self.rebinned_c}"

        self.biascor_dep = self.get_dep(BiasCor, fail=True)
        self.sys_file_in = self.get_sys_file_in()
        self.output["blind"] = self.biascor_dep.output["blind"]
        self.input_file = os.path.join(
            self.output_dir,
            self.biascor_dep.output["subdirs"][index] + ".input")
        self.calibration_set = options.get("CALIBRATORS", [])
        self.output["hubble_plot"] = self.biascor_dep.output["hubble_plot"]

        if self.config.get("COSMOMC", True):
            self.logger.info("Generating cosmomc output")
            self.output["ini_dir"] = os.path.join(self.config_dir, "cosmomc")
            self.prepare_cosmomc = True
        else:
            self.logger.info("Not generating cosmomc output")
            self.prepare_cosmomc = False
        covopts_map = {"ALL": 0}
        for i, covopt in enumerate(self.options.get("COVOPTS", [])):
            covopts_map[covopt.split("]")[0][1:]] = i + 1
        self.output["covopts"] = covopts_map
        self.output["index"] = index
        self.output["bcor_name"] = self.biascor_dep.name
        self.slurm = """{sbatch_header}

Example #20

0

Show file

    def __init__(self,
                 name,
                 output_dir,
                 dependencies,
                 mode,
                 options,
                 index=0,
                 model_name=None):
        super().__init__(name,
                         output_dir,
                         dependencies,
                         mode,
                         options,
                         index=index,
                         model_name=model_name)
        self.global_config = get_config()
        self.dump_dir = output_dir + "/dump"
        self.job_base_name = os.path.basename(output_dir)
        self.gpu = True
        self.tmp_output = None
        self.done_file = os.path.join(self.output_dir, "done_task.txt")
        self.done_file2 = os.path.join(self.output_dir, "done_task2.txt")
        self.variant = options.get("VARIANT", "vanilla").lower()
        self.redshift = "zspe" if options.get("REDSHIFT", True) else "none"
        self.norm = options.get("NORM", "cosmo")
        self.validate_model()

        assert self.norm in [
            "global", "cosmo", "perfilter"
        ], f"Norm option is set to {self.norm}, needs to be one of 'global', 'cosmo', 'perfilter'"
        assert self.variant in [
            "vanilla", "variational", "bayesian"
        ], f"Variant {self.variant} is not vanilla, variational or bayesian"
        self.slurm = """#!/bin/bash

#SBATCH --job-name={job_name}
#SBATCH --time=23:00:00
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --partition=gpu2
#SBATCH --gres=gpu:1
#SBATCH --output=output.log
#SBATCH --account=pi-rkessler
#SBATCH --mem=64GB

source activate {conda_env}
module load cuda
echo `which python`
cd {path_to_classifier}
echo "#################TIMING  Starting here:   `date`"
python run.py --data --sntypes '{sntypes}' --dump_dir {dump_dir} --raw_dir {photometry_dir} {fit_dir} {phot} {clump} {norm} {test_or_train}
if [ $? -ne 0 ]; then
    echo FAILURE > {done_file2}
else
    echo "#################TIMING  Database done now, starting classifier:   `date`"
    python run.py --use_cuda {cyclic} --sntypes '{sntypes}' --done_file {done_file} --batch_size 20 --dump_dir {dump_dir} {cyclic} {variant} {model} {phot} {redshift} {norm} {command}
    if [ $? -eq 0 ]; then
        rm -rf {dump_dir}/processed
        echo SUCCESS > {done_file2}
    else
        echo FAILURE > {done_file2}
    fi
fi
echo "#################TIMING  Classifier finished:   `date`"
        """
        self.conda_env = self.global_config["SuperNNova"]["conda_env"]
        self.path_to_classifier = get_output_loc(
            self.global_config["SuperNNova"]["location"])

Example #21

0

Show file

def run(args):

    if args is None:
        return None

    init()

    # Load YAML config file
    yaml_path = os.path.abspath(os.path.expandvars(args.yaml))
    assert os.path.exists(yaml_path), f"File {yaml_path} cannot be found."
    config_raw, config = load_yaml(yaml_path)
    #with open(yaml_path, "r") as f:
    #    config = yaml.safe_load(f)

    overwrites = config.get("GLOBAL")
    if config.get("GLOBALS") is not None:
        logging.warning(
            "Your config file has a GLOBALS section in it. If you're trying to overwrite cfg.yml, rename this to GLOBAL"
        )

    cfg = None
    if config.get("GLOBAL"):
        cfg = config.get("GLOBAL").get("CFG_PATH")
    if cfg is None:
        cfg = args.config

    global_config = get_config(initial_path=cfg, overwrites=overwrites)

    config_filename = os.path.basename(args.yaml).split(".")[0].upper()
    output_dir = get_output_dir()
    logging_folder = os.path.abspath(os.path.join(output_dir, config_filename))

    if not args.check:
        mkdirs(logging_folder)
    if os.path.exists(logging_folder):
        chown_dir(logging_folder, walk=args.permission)

    if args.permission:
        return

    message_store, logging_filename = setup_logging(config_filename,
                                                    logging_folder, args)

    for i, d in enumerate(global_config["DATA_DIRS"]):
        logging.debug(f"Data directory {i + 1} set as {d}")
        assert d is not None, "Data directory is none, which means it failed to resolve. Check the error message above for why."

    logging.info(
        f"Running on: {os.environ.get('HOSTNAME', '$HOSTNAME not set')} login node."
    )

    manager = Manager(config_filename, yaml_path, config_raw, config,
                      message_store)

    # Gracefully hand Ctrl-c
    def handler(signum, frame):
        logging.error("Ctrl-c was pressed.")
        logging.warning(
            "All remaining tasks will be killed and their hash reset")
        manager.kill_remaining_tasks()
        exit(1)

    signal.signal(signal.SIGINT, handler)

    if args.start is not None:
        args.refresh = True
    manager.set_start(args.start)
    manager.set_finish(args.finish)
    manager.set_force_refresh(args.refresh)
    manager.set_force_ignore_stage(args.ignore)
    manager.execute(args.check, args.compress, args.uncompress)
    chown_file(logging_filename)
    return manager

Example #22

0

Show file

File: biascor.py Project: Samreay/Pippin

    def __init__(self, name, output_dir, config, dependencies, options,
                 global_config):
        base = get_data_loc(config.get("BASE",
                                       "surveys/des/bbc/bbc_5yr.input"))
        self.base_file = base
        super().__init__(name,
                         output_dir,
                         config,
                         base,
                         "=",
                         dependencies=dependencies)

        self.options = options
        self.logging_file = os.path.join(self.output_dir, "output.log")
        self.global_config = get_config()

        self.prob_cols = config["PROB_COLS"]

        self.merged_data = config.get("DATA")
        self.merged_iasim = config.get("SIMFILE_BIASCOR")
        self.merged_ccsim = config.get("SIMFILE_CCPRIOR")
        self.classifier = config.get("CLASSIFIER")
        if self.classifier is not None:
            self.config["CLASSIFIER"] = self.classifier.name
        self.make_all = config.get("MAKE_ALL_HUBBLE", True)
        self.use_recalibrated = config.get("USE_RECALIBRATED", False)
        self.consistent_sample = config.get("CONSISTENT_SAMPLE", True)
        self.bias_cor_fits = None
        self.cc_prior_fits = None
        self.data = None
        self.data_fitres = None
        self.sim_names = [m.output["sim_name"] for m in self.merged_data]
        self.blind = self.get_blind(config, options)
        self.logger.debug(f"Blinding set to {self.blind}")
        self.output["blind"] = self.blind
        self.genversions = [m.output["genversion"] for m in self.merged_data]
        self.num_verions = [
            len(m.output["fitres_dirs"]) for m in self.merged_data
        ]
        self.output["fitopt_files"] = [
            m.output.get("fitopt_file") for m in self.merged_data
        ]
        self.genversion = "_".join(self.sim_names) + (
            "" if self.classifier is None else "_" + self.classifier.name)

        self.config_filename = f"{self.name}.input"  # Make sure this syncs with the tmp file name
        self.config_path = os.path.join(self.output_dir, self.config_filename)
        self.kill_file = self.config_path.replace(".input", "_KILL.LOG")
        self.job_name = os.path.basename(self.config_path)
        self.fit_output_dir = os.path.join(self.output_dir, "output")
        self.merge_log = os.path.join(self.fit_output_dir, "MERGE.LOG")
        self.reject_list = os.path.join(self.output_dir, "reject.list")

        self.done_file = os.path.join(self.fit_output_dir, f"ALL.DONE")
        self.done_file_iteration = os.path.join(self.output_dir,
                                                "RESUBMITTED.DONE")
        self.run_iteration = 1 if os.path.exists(
            self.done_file_iteration) else 0
        self.probability_column_name = None
        if self.config.get("PROB_COLUMN_NAME") is not None:
            self.probability_column_name = self.config.get("PROB_COLUMN_NAME")
        elif self.classifier is not None:
            self.probability_column_name = self.prob_cols[self.classifier.name]
        self.output["prob_column_name"] = self.probability_column_name

        if self.use_recalibrated:
            new_name = self.probability_column_name.replace("PROB_", "CPROB_")
            self.logger.debug(
                f"Updating prob column name from {self.probability_column_name} to {new_name}. I hope it exists!"
            )
            self.probability_column_name = new_name
        self.output["fit_output_dir"] = self.fit_output_dir

        self.output["NSPLITRAN"] = "NSPLITRAN" in [
            x.upper() for x in self.options.keys()
        ]
        if self.output["NSPLITRAN"]:
            self.output["NSPLITRAN_VAL"] = {
                x.upper(): y
                for x, y in self.options.items()
            }["NSPLITRAN"]
        self.w_summary = os.path.join(self.fit_output_dir,
                                      "BBC_SUMMARY_wfit.FITRES")
        self.output["w_summary"] = self.w_summary

        self.set_m0dif_dirs()

        if not self.make_all:
            self.output_plots = [self.output_plots[0]]
        self.logger.debug(f"Making {len(self.output_plots)} plots")

        self.muopts = self.config.get("MUOPTS", {})
        self.muopt_order = list(self.muopts.keys())
        self.output["muopts"] = self.muopt_order
        self.output["hubble_plot"] = self.output_plots

        self.devel = self.options.get('devel', 0)

        self.logger.debug(f"Devel option: {self.devel}")
        self.do_iterate = False  # Temp flag to stop iterating as BBC will reiterate natively
        self.logger.debug(f"Do iterate: {self.do_iterate}")
        self.logger.debug(f"SNANA_DIR: {os.environ['SNANA_DIR']}")

Example #23

0

Show file

    def __init__(self,
                 name,
                 output_dir,
                 config,
                 global_config,
                 combine="combine.input"):
        self.data_dirs = global_config["DATA_DIRS"]
        base_file = get_data_loc(combine)
        super().__init__(name, output_dir, config, base_file, ": ")

        # Check for any replacements
        path_sndata_sim = get_config().get("SNANA").get("sim_dir")
        self.logger.debug(f"Setting PATH_SNDATA_SIM to {path_sndata_sim}")
        self.yaml["CONFIG"]["PATH_SNDATA_SIM"] = path_sndata_sim

        self.genversion = self.config["GENVERSION"]
        if len(self.genversion) < 30:
            self.genprefix = self.genversion
        else:
            hash = get_hash(self.genversion)[:5]
            self.genprefix = self.genversion[:25] + hash

        self.options = self.config.get("OPTS", {})

        self.reserved_keywords = ["BASE"]
        self.reserved_top = ["GENVERSION", "GLOBAL", "OPTS", "EXTERNAL"]
        self.config_path = f"{self.output_dir}/{self.genversion}.input"  # Make sure this syncs with the tmp file name
        self.global_config = global_config

        self.sim_log_dir = f"{self.output_dir}/LOGS"
        self.total_summary = os.path.join(self.sim_log_dir, "MERGE.LOG")
        self.done_file = f"{self.output_dir}/LOGS/ALL.DONE"
        self.logging_file = self.config_path.replace(".input", ".LOG")
        self.kill_file = self.config_path.replace(".input", "_KILL.LOG")

        if "EXTERNAL" not in self.config.keys():
            # Deterime the type of each component
            keys = [
                k for k in self.config.keys() if k not in self.reserved_top
            ]
            self.base_ia = []
            self.base_cc = []
            types = {}
            types_dict = {"IA": [], "NONIA": []}
            for k in keys:
                d = self.config[k]
                base_file = d.get("BASE")
                if base_file is None:
                    Task.fail_config(
                        f"Your simulation component {k} for sim name {self.name} needs to specify a BASE input file"
                    )
                base_path = get_data_loc(base_file)
                if base_path is None:
                    Task.fail_config(
                        f"Cannot find sim component {k} base file at {base_path} for sim name {self.name}"
                    )

                gentype, genmodel = None, None
                with open(base_path) as f:
                    for line in f.read().splitlines():
                        if line.upper().strip().startswith("GENTYPE:"):
                            gentype = line.upper().split(":")[1].strip()
                        if line.upper().strip().startswith("GENMODEL:"):
                            genmodel = line.upper().split(":")[1].strip()

                gentype = gentype or d.get("GENTYPE")
                if gentype is None:
                    self.fail_config(
                        f"The simulation component {k} needs to specify a GENTYPE in its input file"
                    )
                gentype = int(gentype)
                genmodel = genmodel or d.get("GENMODEL")

                if not gentype:
                    Task.fail_config(
                        f"Cannot find GENTYPE for component {k} and base file {base_path}"
                    )
                if not genmodel:
                    Task.fail_config(
                        f"Cannot find GENMODEL for component {k} and base file {base_path}"
                    )

                type2 = 100 + gentype
                if "SALT2" in genmodel:
                    self.base_ia.append(base_file)
                    types[gentype] = "Ia"
                    types[type2] = "Ia"
                    types_dict["IA"].append(gentype)
                    types_dict["IA"].append(type2)
                else:
                    self.base_cc.append(base_file)
                    types[gentype] = "II"
                    types[type2] = "II"
                    types_dict["NONIA"].append(gentype)
                    types_dict["NONIA"].append(type2)

            sorted_types = dict(sorted(types.items()))
            self.logger.debug(f"Types found: {json.dumps(sorted_types)}")
            self.output["types_dict"] = types_dict
            self.output["types"] = sorted_types

            rankeys = [
                r for r in self.config["GLOBAL"].keys()
                if r.startswith("RANSEED_")
            ]
            value = int(self.config["GLOBAL"][rankeys[0]].split(" ")
                        [0]) if rankeys else 1
            self.set_num_jobs(2 * value)

            self.output["blind"] = self.options.get("BLIND", False)
            self.derived_batch_info = None

            # Determine if all the top level input files exist
            if len(self.base_ia + self.base_cc) == 0:
                Task.fail_config(
                    "Your sim has no components specified! Please add something to simulate!"
                )

            # Try to determine how many jobs will be put in the queue
            # First see if it's been explicitly set
            num_jobs = self.options.get("NUM_JOBS")
            if num_jobs is not None:
                self.num_jobs = num_jobs
                self.logger.debug(
                    f"Num jobs set by NUM_JOBS option to {self.num_jobs}")
            else:
                try:
                    # If BATCH_INFO is set, we'll use that
                    batch_info = self.config.get("GLOBAL",
                                                 {}).get("BATCH_INFO")
                    default_batch_info = self.yaml["CONFIG"].get("BATCH_INFO")

                    # If its not set, lets check for ranseed_repeat or ranseed_change
                    if batch_info is None:
                        ranseed_repeat = self.config.get(
                            "GLOBAL", {}).get("RANSEED_REPEAT")
                        ranseed_change = self.config.get(
                            "GLOBAL", {}).get("RANSEED_CHANGE")
                        default = self.yaml.get("CONFIG",
                                                {}).get("RANSEED_REPEAT")
                        ranseed = ranseed_repeat or ranseed_change or default

                        if ranseed:
                            num_jobs = int(ranseed.strip().split()[0])
                            self.logger.debug(
                                f"Found a randseed with {num_jobs}, deriving batch info"
                            )
                            comps = default_batch_info.strip().split()
                            comps[-1] = str(num_jobs)
                            self.derived_batch_info = " ".join(comps)
                            self.num_jobs = num_jobs
                            self.logger.debug(
                                f"Num jobs set by RANSEED to {self.num_jobs}")
                    else:
                        # self.logger.debug(f"BATCH INFO property detected as {property}")
                        self.num_jobs = int(batch_info.split()[-1])
                        self.logger.debug(
                            f"Num jobs set by BATCH_INFO to {self.num_jobs}")
                except Exception:
                    self.logger.warning(
                        f"Unable to determine how many jobs simulation {self.name} has"
                    )
                    self.num_jobs = 1

            self.output["genversion"] = self.genversion
            self.output["genprefix"] = self.genprefix

            self.ranseed_change = self.config.get("GLOBAL",
                                                  {}).get("RANSEED_CHANGE")
            base = os.path.expandvars(self.global_config["SNANA"]["sim_dir"])
            self.output["ranseed_change"] = self.ranseed_change is not None
            self.output["ranseed_change_val"] = self.ranseed_change
            self.get_sim_folders(base, self.genversion)
            self.output["sim_folders"] = self.sim_folders
        else:
            self.sim_folders = self.output["sim_folders"]

Example #24

0

Show file

    def __init__(self,
                 name,
                 output_dir,
                 config,
                 options,
                 global_config,
                 dependencies=None):
        super().__init__(name,
                         output_dir,
                         config=config,
                         dependencies=dependencies)
        self.options = options
        self.global_config = get_config()

        self.logfile = os.path.join(self.output_dir, "output.log")
        self.conda_env = self.global_config["DataSkimmer"]["conda_env"]
        self.path_to_task = output_dir

        self.unparsed_raw = self.options.get("RAW_DIR")
        self.raw_dir = get_data_loc(self.unparsed_raw)
        if self.raw_dir is None:
            Task.fail_config(f"Unable to find {self.options.get('RAW_DIR')}")

        self.genversion = os.path.basename(self.raw_dir)
        self.data_path = os.path.dirname(self.raw_dir)
        if self.unparsed_raw == "$SCRATCH_SIMDIR" or "SNDATA_ROOT/SIM" in self.raw_dir:
            self.logger.debug("Removing PRIVATE_DATA_PATH from NML file")
            self.data_path = ""
        self.job_name = os.path.basename(
            Path(output_dir).parents[1]) + "_DATAPREP_" + self.name

        self.output_info = os.path.join(self.output_dir,
                                        f"{self.genversion}.YAML")
        self.output["genversion"] = self.genversion
        self.opt_setpkmjd = options.get("OPT_SETPKMJD", 16)
        self.photflag_mskrej = options.get("PHOTFLAG_MSKREJ", 1016)
        self.output["data_path"] = self.data_path
        self.output["photometry_dirs"] = [get_output_loc(self.raw_dir)]
        self.output["sim_folders"] = [get_output_loc(self.raw_dir)]
        self.output["raw_dir"] = self.raw_dir
        self.clump_file = os.path.join(self.output_dir,
                                       self.genversion + ".SNANA.TEXT")
        self.output["clump_file"] = self.clump_file
        self.output["ranseed_change"] = False
        is_sim = options.get("SIM", False)
        self.output["is_sim"] = is_sim
        self.output["blind"] = options.get("BLIND", True)

        self.types_dict = options.get("TYPES")
        if self.types_dict is None:
            self.types_dict = {
                "IA": [1],
                "NONIA": [
                    2, 20, 21, 22, 29, 30, 31, 32, 33, 39, 40, 41, 42, 43, 80,
                    81
                ]
            }
        else:
            for key in self.types_dict.keys():
                self.types_dict[key] = [int(c) for c in self.types_dict[key]]

        self.batch_file = self.options.get("BATCH_FILE")
        if self.batch_file is not None:
            self.batch_file = get_data_loc(self.batch_file)
        self.batch_replace = self.options.get("BATCH_REPLACE", {})

        self.logger.debug(f"\tIA types are {self.types_dict['IA']}")
        self.logger.debug(f"\tNONIA types are {self.types_dict['NONIA']}")
        self.output["types_dict"] = self.types_dict
        self.types = OrderedDict()
        for n in self.types_dict["IA"]:
            self.types.update({n: "Ia"})
        for n in self.types_dict["NONIA"]:
            self.types.update({n: "II"})
        self.output["types"] = self.types

        self.slurm = """{sbatch_header}
        {task_setup}"""

        self.clump_command = """#