def __init__(self, name, output_dir, config, dependencies, mode, options, index=0, model_name=None): super().__init__(name, output_dir, config, dependencies, mode, options, index=index, model_name=model_name) self.global_config = get_config() self.options = options self.gpu = self.options.get("GPU", True) self.conda_env = self.global_config["SCONE"]["conda_env_cpu"] if not self.gpu else self.global_config["SCONE"]["conda_env_gpu"] self.path_to_classifier = self.global_config["SCONE"]["location"] self.job_base_name = os.path.basename(Path(output_dir).parents[1]) + "__" + os.path.basename(output_dir) self.batch_file = self.options.get("BATCH_FILE") if self.batch_file is not None: self.batch_file = get_data_loc(self.batch_file) self.batch_replace = self.options.get("BATCH_REPLACE", {}) self.config_path = os.path.join(self.output_dir, "model_config.yml") self.heatmaps_path = os.path.join(self.output_dir, "heatmaps") self.csvs_path = os.path.join(self.output_dir, "sim_csvs") self.slurm = """{sbatch_header} {task_setup}""" self.logfile = os.path.join(self.output_dir, "output.log") remake_heatmaps = self.options.get("REMAKE_HEATMAPS", False) self.keep_heatmaps = not remake_heatmaps
def __init__(self, name, output_dir, dependencies, options, merged_data, merged_iasim, merged_ccsim, classifier): self.data_dir = os.path.dirname(inspect.stack()[0][1]) + "/data_files/" super().__init__(name, output_dir, os.path.join(self.data_dir, "bbc.input"), "=", dependencies=dependencies) self.options = options self.logging_file = os.path.join(self.output_dir, "output.log") self.global_config = get_config() self.merged_data = merged_data self.merged_iasim = merged_iasim self.merged_ccsim = merged_ccsim self.bias_cor_fits = None self.cc_prior_fits = None self.data = None self.genversion = "_".join( [m.get_lcfit_dep()["sim_name"] for m in merged_data]) + "_" + classifier.name self.config_filename = f"{self.genversion}.input" # Make sure this syncs with the tmp file name self.config_path = os.path.join(self.output_dir, self.config_filename) self.fit_output_dir = os.path.join(self.output_dir, "output") self.done_file = os.path.join(self.fit_output_dir, f"FITJOBS/ALL.DONE") self.probability_column_name = classifier.output["prob_column_name"] self.output["fit_output_dir"] = self.fit_output_dir
def __init__(self, filename, config_path, config, message_store): self.logger = get_logger() self.task_index = {t: i for i, t in enumerate(self.task_order)} self.message_store = message_store self.filename = filename self.filename_path = config_path self.run_config = config self.global_config = get_config() self.prefix = self.global_config["QUEUE"]["prefix"] + "_" + filename self.max_jobs = int(self.global_config["QUEUE"]["max_jobs"]) self.max_jobs_gpu = int(self.global_config["QUEUE"]["max_gpu_jobs"]) self.max_jobs_in_queue = int( self.global_config["QUEUE"]["max_jobs_in_queue"]) self.max_jobs_in_queue_gpu = int( self.global_config["QUEUE"]["max_gpu_jobs_in_queue"]) self.output_dir = os.path.join(get_output_dir(), self.filename) self.tasks = None self.num_jobs_queue = 0 self.num_jobs_queue_gpu = 0 self.start = None self.finish = None self.force_refresh = False
def __init__(self, name, output_dir, dependencies, mode, options): super().__init__(name, output_dir, dependencies, mode, options) self.global_config = get_config() self.dump_dir = output_dir + "/dump" self.job_base_name = os.path.basename(output_dir) self.tmp_output = None self.done_file = os.path.join(self.output_dir, "done_task.txt") self.variant = options.get("VARIANT", "vanilla").lower() assert self.variant in ["vanilla", "variational", "bayesian"], \ f"Variant {self.variant} is not vanilla, variational or bayesian" self.slurm = """#!/bin/bash #SBATCH --job-name={job_name} #SBATCH --time=15:00:00 #SBATCH --nodes=1 #SBATCH --ntasks=1 #SBATCH --partition=gpu2 #SBATCH --gres=gpu:1 #SBATCH --output=output.log #SBATCH --account=pi-rkessler #SBATCH --mem=64GB source activate {conda_env} module load cuda echo `which python` cd {path_to_classifier} python run.py --data --sntypes '{sntypes}' --dump_dir {dump_dir} --raw_dir {photometry_dir} {fit_dir} {phot} {clump} {test_or_train} python run.py --use_cuda {cyclic} --sntypes '{sntypes}' --done_file {done_file} --dump_dir {dump_dir} {cyclic} {variant} {model} {phot} {command} """ self.conda_env = self.global_config["SuperNNova"]["conda_env"] self.path_to_classifier = get_output_loc( self.global_config["SuperNNova"]["location"])
def __init__(self, filename, config): self.logger = get_logger() self.filename = filename self.run_config = config self.global_config = get_config() self.prefix = "PIP_" + filename self.output_dir = None
def __init__(self, name, output_dir, dependencies, mode, options): super().__init__(name, output_dir, dependencies, mode, options) self.global_config = get_config() self.num_jobs = 4 self.conda_env = self.global_config["ArgonneClassifier"]["conda_env"] self.path_to_classifier = get_output_loc(self.global_config["ArgonneClassifier"]["location"]) self.job_base_name = os.path.basename(output_dir) self.features = options.get("FEATURES", "x1 c zHD x1ERR cERR PKMJDERR") self.model_pk_file = "modelpkl.pkl" self.output_pk_file = os.path.join(self.output_dir, self.model_pk_file) self.slurm = """#!/bin/bash
def __init__(self, filename, config_path, config_raw, config, message_store): self.logger = get_logger() self.task_index = {t: i for i, t in enumerate(self.task_order)} self.message_store = message_store self.filename = filename self.filename_path = config_path self.file_raw = config_raw self.run_config = config self.global_config = get_config() self.prefix = self.global_config["QUEUE"]["prefix"] + "_" + filename self.max_jobs = int(self.global_config["QUEUE"]["max_jobs"]) self.max_jobs_gpu = int(self.global_config["QUEUE"]["max_gpu_jobs"]) self.max_jobs_in_queue = int( self.global_config["QUEUE"]["max_jobs_in_queue"]) self.max_jobs_in_queue_gpu = int( self.global_config["QUEUE"]["max_gpu_jobs_in_queue"]) self.logger.debug(self.global_config.keys()) self.sbatch_cpu_path = get_data_loc( self.global_config["SBATCH"]["cpu_location"]) with open(self.sbatch_cpu_path, 'r') as f: self.sbatch_cpu_header = f.read() self.sbatch_gpu_path = get_data_loc( self.global_config["SBATCH"]["gpu_location"]) with open(self.sbatch_gpu_path, 'r') as f: self.sbatch_gpu_header = f.read() self.sbatch_cpu_header = self.clean_header(self.sbatch_cpu_header) self.sbatch_gpu_header = self.clean_header(self.sbatch_gpu_header) self.setup_task_location = self.global_config["SETUP"]["location"] self.load_task_setup() self.output_dir = os.path.join(get_output_dir(), self.filename) self.tasks = None self.num_jobs_queue = 0 self.num_jobs_queue_gpu = 0 self.start = None self.finish = None self.force_refresh = False self.force_ignore_stage = None self.running = [] self.done = [] self.failed = [] self.blocked = []
def __init__(self, name, output_dir, config, dependencies, mode, options, index=0, model_name=None): super().__init__(name, output_dir, config, dependencies, mode, options, index=index, model_name=model_name) self.global_config = get_config() self.num_jobs = 1 self.conda_env = self.global_config["SNIRF"]["conda_env"] self.path_to_classifier = os.path.dirname(inspect.stack()[0][1]) self.job_base_name = os.path.basename( Path(output_dir).parents[1]) + "__" + os.path.basename(output_dir) self.features = options.get( "FEATURES", "zHD x1 c cERR x1ERR COV_x1_c COV_x1_x0 COV_c_x0 PKMJDERR") # self.model_pk_file = self.get_unique_name() + ".pkl" self.model_pk_file = "model.pkl" self.output_pk_file = os.path.join(self.output_dir, self.model_pk_file) self.predictions_filename = os.path.join(self.output_dir, "predictions.csv") self.fitopt = options.get("FITOPT", "DEFAULT") self.batch_file = self.options.get("BATCH_FILE") if self.batch_file is not None: self.batch_file = get_data_loc(self.batch_file) self.batch_replace = self.options.get("BATCH_REPLACE", {}) self.output["predictions_filename"] = self.predictions_filename self.output["model_filename"] = self.output_pk_file self.validate_model() self.slurm = """{sbatch_header}
def __init__(self, filename, config, message_store): self.logger = get_logger() self.message_store = message_store self.filename = filename self.run_config = config self.global_config = get_config() self.prefix = self.global_config["GLOBAL"]["prefix"] + "_" + filename self.max_jobs = int(self.global_config["GLOBAL"]["max_jobs"]) self.max_jobs_in_queue = int(self.global_config["GLOBAL"]["max_jobs_in_queue"]) self.output_dir = os.path.abspath(os.path.dirname(inspect.stack()[0][1]) + "/../" + self.global_config['OUTPUT']['output_dir'] + "/" + self.filename) self.tasks = None self.start = None self.finish = None self.force_refresh = False
def __init__(self, name, output_dir, dependencies, mode, options, index=0, model_name=None): super().__init__(name, output_dir, dependencies, mode, options, index=index, model_name=model_name) self.global_config = get_config() self.num_jobs = 1 self.conda_env = self.global_config["SNIRF"]["conda_env"] self.path_to_classifier = os.path.dirname(inspect.stack()[0][1]) self.job_base_name = os.path.basename( Path(output_dir).parents[1]) + "__" + os.path.basename(output_dir) self.features = options.get( "FEATURES", "zHD x1 c cERR x1ERR COV_x1_c COV_x1_x0 COV_c_x0 PKMJDERR") # self.model_pk_file = self.get_unique_name() + ".pkl" self.model_pk_file = "model.pkl" self.output_pk_file = os.path.join(self.output_dir, self.model_pk_file) self.predictions_filename = os.path.join(self.output_dir, "predictions.csv") self.fitopt = options.get("FITOPT", "DEFAULT") lcfit = self.get_fit_dependency() self.fitres_filename = lcfit["fitopt_map"][self.fitopt] self.fitres_file = os.path.abspath( os.path.join(lcfit["fitres_dirs"][self.index], self.fitres_filename)) self.output["predictions_filename"] = self.predictions_filename self.output["model_filename"] = self.output_pk_file self.validate_model() self.slurm = """#!/bin/bash
def __init__(self, name, output_dir, options, global_config, dependencies=None, index=0): base_file = get_data_loc("create_cov/input_file.txt") super().__init__(name, output_dir, base_file, default_assignment=": ", dependencies=dependencies) self.options = options self.global_config = get_config() self.index = index self.job_name = os.path.basename( Path(output_dir).parents[1]) + "_CREATE_COV_" + name self.path_to_code = os.path.abspath( os.path.dirname(inspect.stack()[0][1]) + "/external") self.logfile = os.path.join(self.output_dir, "output.log") self.sys_file_in = get_data_loc( options.get("SYS_SCALE", "surveys/des/bbc/scale_5yr.list")) self.sys_file_out = os.path.join(self.output_dir, "sys_scale.LIST") self.chain_dir = os.path.join(self.output_dir, "chains/") self.config_dir = os.path.join(self.output_dir, "output") self.biascor_dep = self.get_dep(BiasCor, fail=True) self.output["blind"] = self.biascor_dep.output["blind"] self.input_file = os.path.join( self.output_dir, self.biascor_dep.output["subdirs"][index] + ".input") self.output["hubble_plot"] = self.biascor_dep.output["hubble_plot"] self.output["ini_dir"] = self.config_dir covopts_map = {"ALL": 0} for i, covopt in enumerate(self.options.get("COVOPTS", [])): covopts_map[covopt.split("]")[0][1:]] = i + 1 self.output["covopts"] = covopts_map self.output["index"] = index self.output["bcor_name"] = self.biascor_dep.name self.slurm = """#!/bin/bash
def __init__(self, name, output_dir, options, dependencies=None): super().__init__(name, output_dir, dependencies=dependencies) self.options = options self.global_config = get_config() self.logfile = os.path.join(self.output_dir, "output.log") self.conda_env = self.global_config["DataSkimmer"]["conda_env"] self.path_to_task = output_dir self.raw_dir = self.options.get("RAW_DIR") self.genversion = os.path.basename(self.raw_dir) self.data_path = os.path.dirname(self.raw_dir) self.job_name = f"DATAPREP_{self.name}" self.output["genversion"] = self.genversion self.output["data_path"] = self.data_path self.output["photometry_dir"] = get_output_loc(self.raw_dir) self.output["raw_dir"] = self.raw_dir self.clump_file = os.path.join(self.output_dir, self.genversion + ".SNANA.TEXT") self.output["clump_file"] = self.clump_file self.slurm = """#!/bin/bash #SBATCH --job-name={job_name} #SBATCH --time=0:20:00 #SBATCH --nodes=1 #SBATCH --ntasks-per-node=1 #SBATCH --partition=broadwl #SBATCH --output={log_file} #SBATCH --account=pi-rkessler #SBATCH --mem=2GB cd {path_to_task} snana.exe clump.nml if [ $? -eq 0 ]; then echo SUCCESS > {done_file} else echo FAILURE > {done_file} fi """ self.clump_command = """#
def __init__(self, name, output_dir, config, dependencies, mode, options, index=0, model_name=None): super().__init__(name, output_dir, config, dependencies, mode, options, index=index, model_name=model_name) self.global_config = get_config() self.num_jobs = 4 self.conda_env = self.global_config["SNIRF"]["conda_env"] self.path_to_classifier = get_output_loc( self.global_config["SNIRF"]["location"]) self.job_base_name = os.path.basename( Path(output_dir).parents[1]) + "__" + os.path.basename(output_dir) self.features = options.get("FEATURES", "x1 c zHD x1ERR cERR PKMJDERR") self.validate_model() self.model_pk_file = "model.pkl" self.output_pk_file = os.path.join(self.output_dir, self.model_pk_file) self.fitopt = options.get("FITOPT", "DEFAULT") self.fitres_filename = None self.fitres_file = None self.batch_file = self.options.get("BATCH_FILE") if self.batch_file is not None: self.batch_file = get_data_loc(self.batch_file) self.batch_replace = self.options.get("BATCH_REPLACE", {}) self.slurm = """{sbatch_header}
def run(args): # Load YAML config file yaml_path = os.path.abspath(os.path.expandvars(args.yaml)) assert os.path.exists(yaml_path), f"File {yaml_path} cannot be found." with open(yaml_path, "r") as f: config = yaml.safe_load(f) overwrites = config.get("GLOBAL") if config.get("GLOBALS") is not None: logging.warning( "Your config file has a GLOBALS section in it. If you're trying to overwrite cfg.yml, rename this to GLOBAL" ) global_config = get_config(initial_path=args.config, overwrites=overwrites) config_filename = os.path.basename(args.yaml).split(".")[0].upper() output_dir = get_output_dir() logging_folder = os.path.abspath(os.path.join(output_dir, config_filename)) if not args.check: mkdirs(logging_folder) message_store, logging_filename = setup_logging(config_filename, logging_folder, args) for i, d in enumerate(global_config["DATA_DIRS"]): logging.debug(f"Data directory {i + 1} set as {d}") assert d is not None, "Data directory is none, which means it failed to resolve. Check the error message above for why." manager = Manager(config_filename, yaml_path, config, message_store) if args.start is not None: args.refresh = True manager.set_start(args.start) manager.set_finish(args.finish) manager.set_force_refresh(args.refresh) manager.execute(args.check) chown_file(logging_filename) return manager
def __init__(self, name, output_dir, dependencies, mode, options, index=0, model_name=None): super().__init__(name, output_dir, dependencies, mode, options, index=index, model_name=model_name) self.global_config = get_config() self.num_jobs = 4 self.conda_env = self.global_config["SNIRF"]["conda_env"] self.path_to_classifier = get_output_loc( self.global_config["SNIRF"]["location"]) self.job_base_name = os.path.basename( Path(output_dir).parents[1]) + "__" + os.path.basename(output_dir) self.features = options.get("FEATURES", "x1 c zHD x1ERR cERR PKMJDERR") self.validate_model() self.model_pk_file = "model.pkl" self.output_pk_file = os.path.join(self.output_dir, self.model_pk_file) self.fitopt = options.get("FITOPT", "DEFAULT") lcfit = self.get_fit_dependency() self.fitres_filename = lcfit["fitopt_map"][self.fitopt] self.fitres_file = os.path.abspath( os.path.join(lcfit["fitres_dirs"][self.index], self.fitres_filename)) self.slurm = """#!/bin/bash
def __init__(self, name, output_dir, options, global_config, dependencies=None): super().__init__(name, output_dir, dependencies=dependencies) self.options = options self.global_config = get_config() self.logfile = os.path.join(self.output_dir, "output.log") self.conda_env = self.global_config["DataSkimmer"]["conda_env"] self.path_to_task = output_dir self.unparsed_raw = self.options.get("RAW_DIR") self.raw_dir = get_data_loc(self.unparsed_raw) if self.raw_dir is None: Task.fail_config(f"Unable to find {self.options.get('RAW_DIR')}") self.genversion = os.path.basename(self.raw_dir) self.data_path = os.path.dirname(self.raw_dir) if self.unparsed_raw == "$SCRATCH_SIMDIR" or "SNDATA_ROOT/SIM" in self.raw_dir: self.logger.debug("Removing PRIVATE_DATA_PATH from NML file") self.data_path = "" self.job_name = os.path.basename( Path(output_dir).parents[1]) + "_DATAPREP_" + self.name self.output["genversion"] = self.genversion self.output["data_path"] = self.data_path self.output["photometry_dirs"] = [get_output_loc(self.raw_dir)] self.output["sim_folders"] = [get_output_loc(self.raw_dir)] self.output["raw_dir"] = self.raw_dir self.clump_file = os.path.join(self.output_dir, self.genversion + ".SNANA.TEXT") self.output["clump_file"] = self.clump_file self.output["ranseed_change"] = False is_sim = options.get("SIM", False) self.output["is_sim"] = is_sim self.output["blind"] = options.get("BLIND", not is_sim) self.types_dict = options.get("TYPES") if self.types_dict is None: self.types_dict = { "IA": [1], "NONIA": [ 2, 20, 21, 22, 29, 30, 31, 32, 33, 39, 40, 41, 42, 42, 43, 80, 81 ] } else: for key in self.types_dict.keys(): self.types_dict[key] = [int(c) for c in self.types_dict[key]] self.logger.debug(f"\tIA types are {self.types_dict['IA']}") self.logger.debug(f"\tNONIA types are {self.types_dict['NONIA']}") self.output["types_dict"] = self.types_dict self.types = OrderedDict() for n in self.types_dict["IA"]: self.types.update({n: "Ia"}) for n in self.types_dict["NONIA"]: self.types.update({n: "II"}) self.output["types"] = self.types self.slurm = """#!/bin/bash #SBATCH --job-name={job_name} #SBATCH --time=0:20:00 #SBATCH --nodes=1 #SBATCH --ntasks-per-node=1 #SBATCH --partition=broadwl #SBATCH --output={log_file} #SBATCH --account=pi-rkessler #SBATCH --mem=2GB cd {path_to_task} snana.exe clump.nml if [ $? -eq 0 ]; then echo SUCCESS > {done_file} else echo FAILURE > {done_file} fi """ self.clump_command = """#
def __init__(self, name, output_dir, config, options, dependencies=None): super().__init__(name, output_dir, config=config, dependencies=dependencies) self.options = options self.global_config = get_config() self.logfile = os.path.join(self.output_dir, "output.log") self.job_name = os.path.basename( Path(output_dir).parents[1]) + "_ANALYSE_" + os.path.basename( output_dir) self.path_to_codes = [] self.done_files = [] self.plot_code_dir = os.path.join( os.path.dirname(inspect.stack()[0][1]), "external") self.covopts = options.get("COVOPTS") self.singular_blind = options.get("SINGULAR_BLIND", False) if isinstance(self.covopts, str): self.covopts = [self.covopts] self.cosmomc_input_files = [] self.cosmomc_output_files = [] self.cosmomc_covopts = [] self.names = [] self.params = [] # Assuming all deps are cosmomc tasks self.cosmomc_deps = self.get_deps(CosmoMC) self.blind = np.any([c.output["blind"] for c in self.cosmomc_deps]) if self.blind: self.blind_params = ["w", "om", "ol", "omegam", "omegal"] else: if options.get("BLIND", False): self.blind_params = options.get("BLIND") else: self.blind_params = [] self.biascor_deps = self.get_deps(BiasCor) self.lcfit_deps = self.get_deps(SNANALightCurveFit) if self.cosmomc_deps: self.add_plot_script_to_run("parse_cosmomc.py") self.add_plot_script_to_run("plot_cosmomc.py") self.add_plot_script_to_run("plot_errbudget.py") if self.biascor_deps: self.add_plot_script_to_run("parse_biascor.py") self.add_plot_script_to_run("plot_biascor.py") if self.lcfit_deps: self.add_plot_script_to_run("parse_lcfit.py") self.add_plot_script_to_run("plot_histogram.py") self.add_plot_script_to_run("plot_efficiency.py") if self.options.get("ADDITIONAL_SCRIPTS") is not None: vals = ensure_list(self.options.get("ADDITIONAL_SCRIPTS")) for v in vals: self.add_plot_script_to_run(v) self.done_file = self.done_files[-1] for c in self.cosmomc_deps: for covopt in c.output["covopts"]: self.cosmomc_input_files.append(c.output["base_dict"][covopt]) self.cosmomc_output_files.append(c.output["label"] + "_" + covopt + ".csv.gz") self.cosmomc_covopts.append(covopt) self.names.append(c.output["label"].replace("_", " ") + " " + covopt) for p in c.output["cosmology_params"]: if p not in self.params: self.params.append(p) self.logger.debug( f"Analyse task will create CosmoMC plots with {len(self.cosmomc_input_files)} covopts/plots" ) self.wsummary_files = [ b.output["w_summary"] for b in self.biascor_deps ] # Get the fitres and m0diff files we'd want to parse for Hubble diagram plotting self.biascor_fitres_input_files = [ os.path.join(m, "FITOPT000_MUOPT000.FITRES.gz") for b in self.biascor_deps for m in b.output["m0dif_dirs"] ] self.biascor_prob_col_names = [ b.output["prob_column_name"] for b in self.biascor_deps for m in b.output["m0dif_dirs"] ] self.biascor_fitres_output_files = [ b.name + "__" + os.path.basename(m).replace("OUTPUT_BBCFIT", "1") + "__FITOPT0_MUOPT0.fitres.gz" for b in self.biascor_deps for m in b.output["m0dif_dirs"] ] self.biascor_m0diffs = [] self.biascor_m0diff_output = "all_biascor_m0diffs.csv" self.biascor_fitres_combined = "all_biascor_fitres.csv.gz" self.batch_file = self.options.get("BATCH_FILE") if self.batch_file is not None: self.batch_file = get_data_loc(self.batch_file) self.batch_replace = self.options.get("BATCH_REPLACE", {}) self.slurm = """{sbatch_header}
def __init__(self, name, output_dir, config, dependencies, mode, options, index=0, model_name=None): super().__init__(name, output_dir, config, dependencies, mode, options, index=index, model_name=model_name) self.global_config = get_config() self.dump_dir = output_dir + "/dump" self.job_base_name = os.path.basename(output_dir) self.gpu = config.get("GPU", True) self.tmp_output = None self.done_file = os.path.join(self.output_dir, "done_task.txt") self.done_file2 = os.path.join(self.output_dir, "done_task2.txt") self.variant = options.get("VARIANT", "vanilla").lower() self.redshift = "zspe" if options.get("REDSHIFT", True) else "none" self.norm = options.get("NORM", "cosmo") self.cyclic = options.get("CYCLIC", True) self.seed = options.get("SEED", 0) self.clean = config.get("CLEAN", True) self.batch_size = options.get("BATCH_SIZE", 128) self.num_layers = options.get("NUM_LAYERS", 2) self.hidden_dim = options.get("HIDDEN_DIM", 32) # Setup yml files self.data_yml_file = options.get("DATA_YML", None) self.output_data_yml = os.path.join(self.output_dir, "data.yml") self.classification_yml_file = options.get("CLASSIFICATION_YML", None) self.output_classification_yml = os.path.join(self.output_dir, "classification.yml") # XOR - only runs if either but not both yml's are None if (self.data_yml_file is None) ^ (self.classification_yml_file is None): self.logger.error( f"If using yml inputs, both 'DATA_YML' (currently {self.data_yml} and 'CLASSIFICATION_YML' (currently {self.classification_yml}) must be provided" ) elif self.data_yml_file is not None: with open(self.data_yml_file, 'r') as f: self.data_yml = f.read() with open(self.classification_yml_file, 'r') as f: self.classification_yml = f.read() self.has_yml = True self.variant = self.get_variant_from_yml(self.classification_yml) else: self.data_yml = None self.classification_yml = None self.has_yml = False self.batch_file = self.options.get("BATCH_FILE") if self.batch_file is not None: self.batch_file = get_data_loc(self.batch_file) self.batch_replace = self.options.get("BATCH_REPLACE", {}) self.validate_model() assert self.norm in [ "global", "cosmo", "perfilter", "cosmo_quantile", "none", ], f"Norm option is set to {self.norm}, needs to be one of 'global', 'cosmo', 'perfilter', 'cosmo_quantile" assert self.variant in [ "vanilla", "variational", "bayesian" ], f"Variant {self.variant} is not vanilla, variational or bayesian" self.slurm = """{sbatch_header} {task_setup} """ self.conda_env = self.global_config["SuperNNova"]["conda_env"] self.path_to_classifier = get_output_loc( self.global_config["SuperNNova"]["location"])
def __init__(self, name, output_dir, config, options, global_config, dependencies=None, index=0): base_file = get_data_loc("create_cov/input_file.txt") super().__init__(name, output_dir, config, base_file, default_assignment=": ", dependencies=dependencies) if options is None: options = {} self.options = options self.templates_dir = self.options.get("INI_DIR", "cosmomc_templates") self.global_config = get_config() self.index = index self.job_name = os.path.basename( Path(output_dir).parents[1]) + "_CREATE_COV_" + name #self.path_to_code = os.path.abspath(os.path.dirname(inspect.stack()[0][1]) + "/external/") self.path_to_code = '$SNANA_DIR/util/' #Now maintained by SNANA self.batch_mem = options.get("BATCH_MEM", "4GB") self.logfile = os.path.join(self.output_dir, "output.log") self.sys_file_out = os.path.join(self.output_dir, "sys_scale.yml") self.chain_dir = os.path.join(self.output_dir, "chains/") self.config_dir = os.path.join(self.output_dir, "output") self.subtract_vpec = options.get("SUBTRACT_VPEC", False) self.unbinned_covmat_addin = options.get("UNBINNED_COVMAT_ADDIN", []) self.batch_file = self.options.get("BATCH_FILE") if self.batch_file is not None: self.batch_file = get_data_loc(self.batch_file) self.batch_replace = self.options.get("BATCH_REPLACE", {}) self.binned = options.get("BINNED", not self.subtract_vpec) self.rebinned_x1 = options.get("REBINNED_X1", "") if self.rebinned_x1 != "": self.rebinned_x1 = f"--nbin_x1 {self.rebinned_x1}" self.rebinned_c = options.get("REBINNED_C", "") if self.rebinned_c != "": self.rebinned_c = f"--nbin_c {self.rebinned_c}" self.biascor_dep = self.get_dep(BiasCor, fail=True) self.sys_file_in = self.get_sys_file_in() self.output["blind"] = self.biascor_dep.output["blind"] self.input_file = os.path.join( self.output_dir, self.biascor_dep.output["subdirs"][index] + ".input") self.calibration_set = options.get("CALIBRATORS", []) self.output["hubble_plot"] = self.biascor_dep.output["hubble_plot"] if self.config.get("COSMOMC", True): self.logger.info("Generating cosmomc output") self.output["ini_dir"] = os.path.join(self.config_dir, "cosmomc") self.prepare_cosmomc = True else: self.logger.info("Not generating cosmomc output") self.prepare_cosmomc = False covopts_map = {"ALL": 0} for i, covopt in enumerate(self.options.get("COVOPTS", [])): covopts_map[covopt.split("]")[0][1:]] = i + 1 self.output["covopts"] = covopts_map self.output["index"] = index self.output["bcor_name"] = self.biascor_dep.name self.slurm = """{sbatch_header}
def __init__(self, name, output_dir, dependencies, mode, options, index=0, model_name=None): super().__init__(name, output_dir, dependencies, mode, options, index=index, model_name=model_name) self.global_config = get_config() self.dump_dir = output_dir + "/dump" self.job_base_name = os.path.basename(output_dir) self.gpu = True self.tmp_output = None self.done_file = os.path.join(self.output_dir, "done_task.txt") self.done_file2 = os.path.join(self.output_dir, "done_task2.txt") self.variant = options.get("VARIANT", "vanilla").lower() self.redshift = "zspe" if options.get("REDSHIFT", True) else "none" self.norm = options.get("NORM", "cosmo") self.validate_model() assert self.norm in [ "global", "cosmo", "perfilter" ], f"Norm option is set to {self.norm}, needs to be one of 'global', 'cosmo', 'perfilter'" assert self.variant in [ "vanilla", "variational", "bayesian" ], f"Variant {self.variant} is not vanilla, variational or bayesian" self.slurm = """#!/bin/bash #SBATCH --job-name={job_name} #SBATCH --time=23:00:00 #SBATCH --nodes=1 #SBATCH --ntasks=1 #SBATCH --partition=gpu2 #SBATCH --gres=gpu:1 #SBATCH --output=output.log #SBATCH --account=pi-rkessler #SBATCH --mem=64GB source activate {conda_env} module load cuda echo `which python` cd {path_to_classifier} echo "#################TIMING Starting here: `date`" python run.py --data --sntypes '{sntypes}' --dump_dir {dump_dir} --raw_dir {photometry_dir} {fit_dir} {phot} {clump} {norm} {test_or_train} if [ $? -ne 0 ]; then echo FAILURE > {done_file2} else echo "#################TIMING Database done now, starting classifier: `date`" python run.py --use_cuda {cyclic} --sntypes '{sntypes}' --done_file {done_file} --batch_size 20 --dump_dir {dump_dir} {cyclic} {variant} {model} {phot} {redshift} {norm} {command} if [ $? -eq 0 ]; then rm -rf {dump_dir}/processed echo SUCCESS > {done_file2} else echo FAILURE > {done_file2} fi fi echo "#################TIMING Classifier finished: `date`" """ self.conda_env = self.global_config["SuperNNova"]["conda_env"] self.path_to_classifier = get_output_loc( self.global_config["SuperNNova"]["location"])
def run(args): if args is None: return None init() # Load YAML config file yaml_path = os.path.abspath(os.path.expandvars(args.yaml)) assert os.path.exists(yaml_path), f"File {yaml_path} cannot be found." config_raw, config = load_yaml(yaml_path) #with open(yaml_path, "r") as f: # config = yaml.safe_load(f) overwrites = config.get("GLOBAL") if config.get("GLOBALS") is not None: logging.warning( "Your config file has a GLOBALS section in it. If you're trying to overwrite cfg.yml, rename this to GLOBAL" ) cfg = None if config.get("GLOBAL"): cfg = config.get("GLOBAL").get("CFG_PATH") if cfg is None: cfg = args.config global_config = get_config(initial_path=cfg, overwrites=overwrites) config_filename = os.path.basename(args.yaml).split(".")[0].upper() output_dir = get_output_dir() logging_folder = os.path.abspath(os.path.join(output_dir, config_filename)) if not args.check: mkdirs(logging_folder) if os.path.exists(logging_folder): chown_dir(logging_folder, walk=args.permission) if args.permission: return message_store, logging_filename = setup_logging(config_filename, logging_folder, args) for i, d in enumerate(global_config["DATA_DIRS"]): logging.debug(f"Data directory {i + 1} set as {d}") assert d is not None, "Data directory is none, which means it failed to resolve. Check the error message above for why." logging.info( f"Running on: {os.environ.get('HOSTNAME', '$HOSTNAME not set')} login node." ) manager = Manager(config_filename, yaml_path, config_raw, config, message_store) # Gracefully hand Ctrl-c def handler(signum, frame): logging.error("Ctrl-c was pressed.") logging.warning( "All remaining tasks will be killed and their hash reset") manager.kill_remaining_tasks() exit(1) signal.signal(signal.SIGINT, handler) if args.start is not None: args.refresh = True manager.set_start(args.start) manager.set_finish(args.finish) manager.set_force_refresh(args.refresh) manager.set_force_ignore_stage(args.ignore) manager.execute(args.check, args.compress, args.uncompress) chown_file(logging_filename) return manager
def __init__(self, name, output_dir, config, dependencies, options, global_config): base = get_data_loc(config.get("BASE", "surveys/des/bbc/bbc_5yr.input")) self.base_file = base super().__init__(name, output_dir, config, base, "=", dependencies=dependencies) self.options = options self.logging_file = os.path.join(self.output_dir, "output.log") self.global_config = get_config() self.prob_cols = config["PROB_COLS"] self.merged_data = config.get("DATA") self.merged_iasim = config.get("SIMFILE_BIASCOR") self.merged_ccsim = config.get("SIMFILE_CCPRIOR") self.classifier = config.get("CLASSIFIER") if self.classifier is not None: self.config["CLASSIFIER"] = self.classifier.name self.make_all = config.get("MAKE_ALL_HUBBLE", True) self.use_recalibrated = config.get("USE_RECALIBRATED", False) self.consistent_sample = config.get("CONSISTENT_SAMPLE", True) self.bias_cor_fits = None self.cc_prior_fits = None self.data = None self.data_fitres = None self.sim_names = [m.output["sim_name"] for m in self.merged_data] self.blind = self.get_blind(config, options) self.logger.debug(f"Blinding set to {self.blind}") self.output["blind"] = self.blind self.genversions = [m.output["genversion"] for m in self.merged_data] self.num_verions = [ len(m.output["fitres_dirs"]) for m in self.merged_data ] self.output["fitopt_files"] = [ m.output.get("fitopt_file") for m in self.merged_data ] self.genversion = "_".join(self.sim_names) + ( "" if self.classifier is None else "_" + self.classifier.name) self.config_filename = f"{self.name}.input" # Make sure this syncs with the tmp file name self.config_path = os.path.join(self.output_dir, self.config_filename) self.kill_file = self.config_path.replace(".input", "_KILL.LOG") self.job_name = os.path.basename(self.config_path) self.fit_output_dir = os.path.join(self.output_dir, "output") self.merge_log = os.path.join(self.fit_output_dir, "MERGE.LOG") self.reject_list = os.path.join(self.output_dir, "reject.list") self.done_file = os.path.join(self.fit_output_dir, f"ALL.DONE") self.done_file_iteration = os.path.join(self.output_dir, "RESUBMITTED.DONE") self.run_iteration = 1 if os.path.exists( self.done_file_iteration) else 0 self.probability_column_name = None if self.config.get("PROB_COLUMN_NAME") is not None: self.probability_column_name = self.config.get("PROB_COLUMN_NAME") elif self.classifier is not None: self.probability_column_name = self.prob_cols[self.classifier.name] self.output["prob_column_name"] = self.probability_column_name if self.use_recalibrated: new_name = self.probability_column_name.replace("PROB_", "CPROB_") self.logger.debug( f"Updating prob column name from {self.probability_column_name} to {new_name}. I hope it exists!" ) self.probability_column_name = new_name self.output["fit_output_dir"] = self.fit_output_dir self.output["NSPLITRAN"] = "NSPLITRAN" in [ x.upper() for x in self.options.keys() ] if self.output["NSPLITRAN"]: self.output["NSPLITRAN_VAL"] = { x.upper(): y for x, y in self.options.items() }["NSPLITRAN"] self.w_summary = os.path.join(self.fit_output_dir, "BBC_SUMMARY_wfit.FITRES") self.output["w_summary"] = self.w_summary self.set_m0dif_dirs() if not self.make_all: self.output_plots = [self.output_plots[0]] self.logger.debug(f"Making {len(self.output_plots)} plots") self.muopts = self.config.get("MUOPTS", {}) self.muopt_order = list(self.muopts.keys()) self.output["muopts"] = self.muopt_order self.output["hubble_plot"] = self.output_plots self.devel = self.options.get('devel', 0) self.logger.debug(f"Devel option: {self.devel}") self.do_iterate = False # Temp flag to stop iterating as BBC will reiterate natively self.logger.debug(f"Do iterate: {self.do_iterate}") self.logger.debug(f"SNANA_DIR: {os.environ['SNANA_DIR']}")
def __init__(self, name, output_dir, config, global_config, combine="combine.input"): self.data_dirs = global_config["DATA_DIRS"] base_file = get_data_loc(combine) super().__init__(name, output_dir, config, base_file, ": ") # Check for any replacements path_sndata_sim = get_config().get("SNANA").get("sim_dir") self.logger.debug(f"Setting PATH_SNDATA_SIM to {path_sndata_sim}") self.yaml["CONFIG"]["PATH_SNDATA_SIM"] = path_sndata_sim self.genversion = self.config["GENVERSION"] if len(self.genversion) < 30: self.genprefix = self.genversion else: hash = get_hash(self.genversion)[:5] self.genprefix = self.genversion[:25] + hash self.options = self.config.get("OPTS", {}) self.reserved_keywords = ["BASE"] self.reserved_top = ["GENVERSION", "GLOBAL", "OPTS", "EXTERNAL"] self.config_path = f"{self.output_dir}/{self.genversion}.input" # Make sure this syncs with the tmp file name self.global_config = global_config self.sim_log_dir = f"{self.output_dir}/LOGS" self.total_summary = os.path.join(self.sim_log_dir, "MERGE.LOG") self.done_file = f"{self.output_dir}/LOGS/ALL.DONE" self.logging_file = self.config_path.replace(".input", ".LOG") self.kill_file = self.config_path.replace(".input", "_KILL.LOG") if "EXTERNAL" not in self.config.keys(): # Deterime the type of each component keys = [ k for k in self.config.keys() if k not in self.reserved_top ] self.base_ia = [] self.base_cc = [] types = {} types_dict = {"IA": [], "NONIA": []} for k in keys: d = self.config[k] base_file = d.get("BASE") if base_file is None: Task.fail_config( f"Your simulation component {k} for sim name {self.name} needs to specify a BASE input file" ) base_path = get_data_loc(base_file) if base_path is None: Task.fail_config( f"Cannot find sim component {k} base file at {base_path} for sim name {self.name}" ) gentype, genmodel = None, None with open(base_path) as f: for line in f.read().splitlines(): if line.upper().strip().startswith("GENTYPE:"): gentype = line.upper().split(":")[1].strip() if line.upper().strip().startswith("GENMODEL:"): genmodel = line.upper().split(":")[1].strip() gentype = gentype or d.get("GENTYPE") if gentype is None: self.fail_config( f"The simulation component {k} needs to specify a GENTYPE in its input file" ) gentype = int(gentype) genmodel = genmodel or d.get("GENMODEL") if not gentype: Task.fail_config( f"Cannot find GENTYPE for component {k} and base file {base_path}" ) if not genmodel: Task.fail_config( f"Cannot find GENMODEL for component {k} and base file {base_path}" ) type2 = 100 + gentype if "SALT2" in genmodel: self.base_ia.append(base_file) types[gentype] = "Ia" types[type2] = "Ia" types_dict["IA"].append(gentype) types_dict["IA"].append(type2) else: self.base_cc.append(base_file) types[gentype] = "II" types[type2] = "II" types_dict["NONIA"].append(gentype) types_dict["NONIA"].append(type2) sorted_types = dict(sorted(types.items())) self.logger.debug(f"Types found: {json.dumps(sorted_types)}") self.output["types_dict"] = types_dict self.output["types"] = sorted_types rankeys = [ r for r in self.config["GLOBAL"].keys() if r.startswith("RANSEED_") ] value = int(self.config["GLOBAL"][rankeys[0]].split(" ") [0]) if rankeys else 1 self.set_num_jobs(2 * value) self.output["blind"] = self.options.get("BLIND", False) self.derived_batch_info = None # Determine if all the top level input files exist if len(self.base_ia + self.base_cc) == 0: Task.fail_config( "Your sim has no components specified! Please add something to simulate!" ) # Try to determine how many jobs will be put in the queue # First see if it's been explicitly set num_jobs = self.options.get("NUM_JOBS") if num_jobs is not None: self.num_jobs = num_jobs self.logger.debug( f"Num jobs set by NUM_JOBS option to {self.num_jobs}") else: try: # If BATCH_INFO is set, we'll use that batch_info = self.config.get("GLOBAL", {}).get("BATCH_INFO") default_batch_info = self.yaml["CONFIG"].get("BATCH_INFO") # If its not set, lets check for ranseed_repeat or ranseed_change if batch_info is None: ranseed_repeat = self.config.get( "GLOBAL", {}).get("RANSEED_REPEAT") ranseed_change = self.config.get( "GLOBAL", {}).get("RANSEED_CHANGE") default = self.yaml.get("CONFIG", {}).get("RANSEED_REPEAT") ranseed = ranseed_repeat or ranseed_change or default if ranseed: num_jobs = int(ranseed.strip().split()[0]) self.logger.debug( f"Found a randseed with {num_jobs}, deriving batch info" ) comps = default_batch_info.strip().split() comps[-1] = str(num_jobs) self.derived_batch_info = " ".join(comps) self.num_jobs = num_jobs self.logger.debug( f"Num jobs set by RANSEED to {self.num_jobs}") else: # self.logger.debug(f"BATCH INFO property detected as {property}") self.num_jobs = int(batch_info.split()[-1]) self.logger.debug( f"Num jobs set by BATCH_INFO to {self.num_jobs}") except Exception: self.logger.warning( f"Unable to determine how many jobs simulation {self.name} has" ) self.num_jobs = 1 self.output["genversion"] = self.genversion self.output["genprefix"] = self.genprefix self.ranseed_change = self.config.get("GLOBAL", {}).get("RANSEED_CHANGE") base = os.path.expandvars(self.global_config["SNANA"]["sim_dir"]) self.output["ranseed_change"] = self.ranseed_change is not None self.output["ranseed_change_val"] = self.ranseed_change self.get_sim_folders(base, self.genversion) self.output["sim_folders"] = self.sim_folders else: self.sim_folders = self.output["sim_folders"]
def __init__(self, name, output_dir, config, options, global_config, dependencies=None): super().__init__(name, output_dir, config=config, dependencies=dependencies) self.options = options self.global_config = get_config() self.logfile = os.path.join(self.output_dir, "output.log") self.conda_env = self.global_config["DataSkimmer"]["conda_env"] self.path_to_task = output_dir self.unparsed_raw = self.options.get("RAW_DIR") self.raw_dir = get_data_loc(self.unparsed_raw) if self.raw_dir is None: Task.fail_config(f"Unable to find {self.options.get('RAW_DIR')}") self.genversion = os.path.basename(self.raw_dir) self.data_path = os.path.dirname(self.raw_dir) if self.unparsed_raw == "$SCRATCH_SIMDIR" or "SNDATA_ROOT/SIM" in self.raw_dir: self.logger.debug("Removing PRIVATE_DATA_PATH from NML file") self.data_path = "" self.job_name = os.path.basename( Path(output_dir).parents[1]) + "_DATAPREP_" + self.name self.output_info = os.path.join(self.output_dir, f"{self.genversion}.YAML") self.output["genversion"] = self.genversion self.opt_setpkmjd = options.get("OPT_SETPKMJD", 16) self.photflag_mskrej = options.get("PHOTFLAG_MSKREJ", 1016) self.output["data_path"] = self.data_path self.output["photometry_dirs"] = [get_output_loc(self.raw_dir)] self.output["sim_folders"] = [get_output_loc(self.raw_dir)] self.output["raw_dir"] = self.raw_dir self.clump_file = os.path.join(self.output_dir, self.genversion + ".SNANA.TEXT") self.output["clump_file"] = self.clump_file self.output["ranseed_change"] = False is_sim = options.get("SIM", False) self.output["is_sim"] = is_sim self.output["blind"] = options.get("BLIND", True) self.types_dict = options.get("TYPES") if self.types_dict is None: self.types_dict = { "IA": [1], "NONIA": [ 2, 20, 21, 22, 29, 30, 31, 32, 33, 39, 40, 41, 42, 43, 80, 81 ] } else: for key in self.types_dict.keys(): self.types_dict[key] = [int(c) for c in self.types_dict[key]] self.batch_file = self.options.get("BATCH_FILE") if self.batch_file is not None: self.batch_file = get_data_loc(self.batch_file) self.batch_replace = self.options.get("BATCH_REPLACE", {}) self.logger.debug(f"\tIA types are {self.types_dict['IA']}") self.logger.debug(f"\tNONIA types are {self.types_dict['NONIA']}") self.output["types_dict"] = self.types_dict self.types = OrderedDict() for n in self.types_dict["IA"]: self.types.update({n: "Ia"}) for n in self.types_dict["NONIA"]: self.types.update({n: "II"}) self.output["types"] = self.types self.slurm = """{sbatch_header} {task_setup}""" self.clump_command = """#