def run(args) -> None: with handle_cd_revertible(args.dir): with acquire_lock(): if os.path.exists("meta.yml"): print("Found existing meta.yml, resuming experiment.") experiment = bopt.Experiment.deserialize() experiment.collect_results() next_params, fitted_model = experiment.suggest() param_str = "\n".join([ f"{key.name}: {value}" for key, value in next_params.mapping.items() ]) param_args = " ".join([ f"--{key.name}={value}" for key, value in next_params.mapping.items() ]) print(f"""PARAMS: {param_str} To evaluate this manually, run: bopt manual-run {param_args} """) else: print("No meta.yml found.") sys.exit(1)
def run(args) -> None: with handle_cd_revertible(args.dir): with ensure_meta_yml(): logging.info("Found existing meta.yml, resuming experiment.") if args.c: with acquire_lock(): experiment = bopt.Experiment.deserialize() n_started = len([ s for s in experiment.samples if s.result is not None or ( s.job and not s.job.is_finished()) ]) else: n_started = 0 max_start = args.n_iter while n_started < max_start: if try_start_job(args): n_started += 1 logging.info("[{}/{}] Started a new evaluation".format( n_started, max_start)) psutil.wait_procs(psutil.Process().children(), timeout=0.01) time.sleep(args.sleep)
def multi(): experiments = [] dirnames = [] # from concurrent.futures import ProcessPoolExecutor # # def f(d): # with handle_cd_revertible(d): # print("Loading", d) # return bopt.Experiment.deserialize() # with ProcessPoolExecutor(max_workers=10) as executor: # experiments = executor.map(f, args.experiments) # dirnames = args.experiments for exp_dir in args.experiments: with handle_cd_revertible(exp_dir), acquire_lock(): print(exp_dir) experiment = bopt.Experiment.deserialize() experiments.append(experiment) dirnames.append(exp_dir) shortened_dirnames = [s.split("/")[-1] for s in dirnames] zipped = list(zip(experiments, shortened_dirnames)) return render_template("multi.html", experiments=experiments, dirnames=dirnames, zipped_experiments_dirnames=zipped)
def run(args): with handle_cd_revertible(args.dir): with acquire_lock(): if args.to_json: with open("meta.yml", "r") as f_src: with open("meta.json", "w") as f_dst: import yaml import json data = yaml.load(f_src, Loader=yaml.Loader) if isinstance(data["gp_config"], GPConfig): data["gp_config"] = data["gp_config"].to_dict() data["samples"] = [ convert_collect_flag(sample) for sample in data["samples"] ] json.dump(data, f_dst) elif args.to_yaml: raise NotImplementedError() else: print("Must provide either --to-yaml or --to-json", file=sys.stderr) sys.exit(1)
def run(args) -> None: with handle_cd_revertible(args.dir): with acquire_lock(), ensure_meta_yml(): experiment = bopt.Experiment.deserialize() experiment.collect_results() # TODO: unify naming run_params vs model_params model_params = vars(args).copy() del model_params["bopt"] del model_params["func"] if "dir" in model_params: del model_params["dir"] mapping = {} for hyperparam in experiment.hyperparameters: if hyperparam.name in model_params: mapping[hyperparam] = \ hyperparam.range.parse(model_params[hyperparam.name]) else: print("\nMissing value for: {}".format(hyperparam.name)) sys.exit(1) hyperparam_values = bopt.HyperparamValues.from_mapping(mapping) if not hyperparam_values.validate(): print("Some values of hyperparameters were invalid, exiting.") sys.exit(1) next_sample, _ = experiment.manual_run(hyperparam_values, bopt.ModelParameters.for_manual_run()) print("Started {}".format(next_sample))
def run(args) -> None: with handle_cd_revertible(args.dir): with acquire_lock(): experiment = Experiment.deserialize() experiment.collect_results() import ipdb ipdb.set_trace() print("Debug finished")
def run(args) -> None: with handle_cd_revertible(args.dir): with acquire_lock(), ensure_meta_yml(): experiment = bopt.Experiment.deserialize() experiment.collect_results() assert args.n_parallel > 0 for i in range(args.n_parallel): logging.info("Starting {}/{}".format(i, args.n_parallel)) experiment.run_next()
def run(args) -> None: with handle_cd_revertible(args.dir): with acquire_lock(): experiment = Experiment.deserialize() for sample in experiment.samples: if sample.job and not sample.job.is_finished(): sample.job.kill() experiment.samples = [] experiment.serialize() for f in (glob.glob("output/*") + glob.glob("plots/*")): os.remove(f)
def try_start_job(args): with acquire_lock(): experiment = bopt.Experiment.deserialize() num_running = len([s for s in experiment.samples if s.is_pending()]) if num_running < args.n_parallel: experiment.collect_results() _, sample = experiment.run_next() experiment.serialize() if not sample.job and \ sample.collect_flag != bopt.CollectFlag.WAITING_FOR_SIMILAR: logging.error("Created invalid sample without a job " "(should have WAITING_FOR_SIMILAR).") return False else: return True else: return False
def run(args) -> None: from bopt.models.gpy_model import GPyModel with handle_cd_revertible(args.dir): with acquire_lock(), ensure_meta_yml(): experiment = bopt.Experiment.deserialize() experiment.collect_results() processed_samples: List[bopt.Sample] = [] for sample in tqdm(experiment.samples_for_prediction()): if sample.model.model_name == GPyModel.model_name: sample_col = bopt.SampleCollection(processed_samples) X, Y = sample_col.to_xy() model = GPyModel.from_model_params(experiment.gp_config, sample.model, X, Y) try: plot_current(experiment, model, sample.to_x()) except ValueError as e: logging.error("Plotting failed {}".format(e)) processed_samples.append(sample)
def run(args) -> None: pathlib.Path(args.dir).mkdir(parents=True, exist_ok=True) pathlib.Path(os.path.join(args.dir, "output")).mkdir(parents=True, exist_ok=True) with handle_cd_revertible(args.dir): with acquire_lock(): hyperparameters: List[Hyperparameter] = [] # TODO: nesmaze se lockfile kdyz to spadne for param in args.param: name, type, *values = param.split(":") cls: Type parser: Callable if type == "discrete": hyp = bopt.Hyperparameter(name, bopt.Discrete(values)) else: mapping = { "int": [bopt.Integer, int], "float": [bopt.Float, float], "logscale_int": [bopt.LogscaleInt, int], "logscale_float": [bopt.LogscaleFloat, float], } if type in mapping: cls, parser = mapping[type] else: logging.error("Invalid value {} for hyperparameter type, " "only 'int', 'float', 'logscale_int', 'logscale_float' " "and 'discrete' are permitted.".format(type)) sys.exit(1) if len(values) == 2: low, high = values buckets = -1 elif len(values) == 3: low, high, buckets = values else: logging.error("Invalid number of values in '%s', must be 2 or 3.", values) sys.exit(1) # assert issubclass(cls, bopt.Bound), "Expected bopt.Bound, got {}".format(cls) hyp = bopt.Hyperparameter(name, cls(parser(low), parser(high), buckets)) hyperparameters.append(hyp) script_path = args.command default_arguments = args.arguments runner: bopt.Runner manual_arg_fnames: List[str] = [] for fname in args.manual_arg_fname: base_fname = os.path.basename(fname) shutil.copy(fname, "./{}".format(base_fname)) manual_arg_fnames.append(base_fname) if args.runner == "local": runner = bopt.LocalRunner(script_path, default_arguments, manual_arg_fnames) elif args.runner == "sge": runner = bopt.SGERunner(script_path, default_arguments, args.qsub or [], manual_arg_fnames) else: logging.error("Invalid value {} for runner," "only 'local' and 'sge' are allowed.".format(args.runner)) sys.exit(1) default_result_regex = "RESULT=(.*)" gp_config = bopt.GPConfig(args) experiment = bopt.Experiment(args.task, args.batch, hyperparameters, runner, default_result_regex, gp_config) experiment.serialize() logging.info(f"Experiment created, run `bopt run -C {args.dir}` to start.")
def run(args) -> None: # TODO: acquire in the same with? with handle_cd_revertible(args.dir): with acquire_lock(): experiment = Experiment.deserialize() experiment.collect_results() if args.r: for sample in experiment.samples: if sample.result is not None: print(sample.result) return best_res = None best_sample = None ok_samples = [] for sample in experiment.samples: if sample.result is not None: try: if best_res is None or (sample.result and sample.result > best_res): best_res = sample.result best_sample = sample except ValueError: # TODO: cleanup checks job_id = sample.job.job_id if sample.job else "NOJOB_ERR" logging.error( "Sample {} failed to parse".format(job_id)) continue ok_samples.append(sample) bad_samples = list(set(experiment.samples) - set(ok_samples)) if args.b and best_sample: print(best_res) return print("Hyperparameters:") for param in experiment.hyperparameters: print(f"\t{param}") if best_sample: best_job_id = best_sample.job.job_id if best_sample.job else "NO_JOB" print("\nBEST (id={}): {}".format(best_job_id, best_res)) assert best_sample is not None if best_sample.job: run_str = experiment.runner.script_path + " \\\n " for h, v in best_sample.hyperparam_values.mapping.items(): if isinstance(v, float): v = round(v, 2) run_str += " --{}={}".format(h.name, v) print(run_str) print() print("STATS:") print(f"OK: {len(ok_samples)}\tBAD: {len(bad_samples)}") print() print("Evaluations:") for sample in ok_samples: job = sample.job proc_stats = "" if job and psutil.pid_exists(job.job_id): process = psutil.Process(job.job_id) mem = process.memory_info() proc_stats += f"Process:{process.status()}" proc_stats += f", cpu={process.cpu_percent()}" # TODO fix this on osx, shared={mem.shared}" proc_stats += f", rss={mem.rss}, vms={mem.vms}" print(f"{sample}\t{proc_stats}") from colored import fg, bg, attr for sample in bad_samples: print(bg("red") + sample) print(attr("reset"))
def experiment_detail(exp_dir, index: Optional[int] = None): with handle_cd_revertible(exp_dir), acquire_lock(): experiment = bopt.Experiment.deserialize() experiment.collect_results() # TODO: zbytek asi neni potreba mit pod lockem, ale pak nejde # cist output sample_results = [s.result for s in experiment.samples if s.result] sample_results_cummax = np.maximum.accumulate( sample_results).tolist() kernel_param_timeline: Dict[str, list] = defaultdict(list) sorted_samples = sorted(experiment.samples, key=lambda x: x.created_at) num_random = len([ s for s in sorted_samples if s.model.sampled_from_random_search() ]) for i, s in enumerate(sorted_samples): if i < num_random + 1: continue for key, value in s.model.params.items(): if isinstance(value, list): for v, h in zip(value, experiment.hyperparameters): kernel_param_timeline["{}_{}".format( key, h.name)].append(v) else: kernel_param_timeline[key].append(value) n_dims = len(experiment.hyperparameters) sample_id = int(request.args.get("sample_id") or -1) show_acq = int(request.args.get("show_acq") or 0) show_marginal = int(request.args.get("show_marginal") or 1) sample = next((s for s in experiment.samples if s.job and s.job.job_id == sample_id), None) random_search_picked = False if sample and sample.model.sampled_from_random_search(): random_search_picked = True print("picked sample", sample) slices_1d = [] slices_2d = [] resolution = 80 if sample and not random_search_picked: x_slice = sample.hyperparam_values.x others = experiment.predictive_samples_before(sample) X_sample, Y_sample = bopt.SampleCollection(others).to_xy() gpy_model = bopt.GPyModel.from_model_params( experiment.gp_config, sample.model, X_sample, Y_sample) # print("Ne-marginal", X_sample, Y_sample, gpy_model.model) # model = gpy_model.model model = bopt.GPyModel.wrap_kernel_with_rounding( gpy_model.model, experiment.hyperparameters) for i in range(n_dims): for j in range(n_dims): if i == j: slices_1d.append( create_slice_1d(i, experiment, resolution, n_dims, x_slice, model, sample, show_marginal)) elif i < j: slices_2d.append( create_slice_2d(i, j, experiment, resolution, n_dims, x_slice, model, sample, show_marginal)) X_sample, Y_sample = bopt.SampleCollection( experiment.samples).to_xy() parallel_data = {} for i, h in enumerate(experiment.hyperparameters): parallel_data[h] = X_sample[:, i].tolist() return render_template( "index.html", exp_dir=exp_dir, experiment=experiment, experiment_index=index, stats=experiment.stats(), parallel_data=parallel_data, sample_results=sample_results, sample_results_cummax=sample_results_cummax, kernel_param_timeline=kernel_param_timeline, picked_sample=sample, CollectFlag=bopt.CollectFlag, slices_1d=slices_1d, slices_2d=slices_2d, sorted_samples=sorted_samples, random_search_picked=random_search_picked, show_acq=show_acq, show_marginal=show_marginal, sample_id=sample_id, )
def run(args) -> None: # TODO: this is completely outdated and broken at this point raise NotImplementedError() with handle_cd_revertible(args.dir): with acquire_lock(): job_files = glob.glob(os.path.join("**", "job-*.yml"), recursive=True) pattern = ".*job-(\\d+).yml" matches = [(fname, re.match(pattern, fname)) for fname in job_files] job_ids = [(fname, int(m.groups()[0])) for fname, m in matches if m is not None and len(m.groups()) == 1] if len(job_ids) == 0: print( f"No jobs found. Check that {args.dir} contains job results." ) sys.exit(1) matched_job_ids = [(fname, job_id) for fname, job_id in job_ids if job_id == args.JOB_ID] if len(matched_job_ids) == 0: print(f"Job with id {args.JOB_ID} not found in '{args.dir}'.") sys.exit(1) elif len(matched_job_ids) > 1: print( f"Found more than one job with id {args.JOB_ID} in '{args.dir}'." ) sys.exit(1) assert len(matched_job_ids) == 1 fname, job_id = matched_job_ids[0] experiment = Experiment.deserialize() experiment.collect_results() # TODO: this is most likely not needed. job = experiment.runner.deserialize_job(job_id) is_finished = job.is_finished() print(f"Status:\t\t{job.status()}") if is_finished: try: if job.is_success(): print(f"Final result:\t{job.final_result()}") else: print(f"Error:\t{job.err()}") except ValueError as e: traceback.print_exc() print() print(f"Parameters:") for key, value in job.run_parameters.items(): print(f"\t{key}: {value}") print("RAW OUTPUT:") print("----------------------------------------------") print(job.get_job_output())