def test_write_grid_files(self): ps = ParamSpace() ps.add_value("p1", True) ps.add_list("p2", ["A", "B"]) ps.add_random("p3", n=2, prior="uniform", low=1, high=3) # print("param space size ", ps.grid_size) out_path = "/tmp/test_params/" if not os.path.exists(out_path) or not os.path.isdir(out_path): os.makedirs(out_path) ps.write_config_files(out_path)
def test_sample_params(self): ps = ParamSpace() ps.add_value("p1", True) ps.add_list("p2", ["A", "B"]) ps.add_random("p3", n=1, prior="uniform", low=1, high=3) x = ps.sample_space() self.assertIsInstance(x, dict)
def test_add_random(self): """ If persist is not set to True for add_random each time we call param_grid, it samples new random values this is because persist = True saves the parameter as a list or randomly generated parameters """ ps = ParamSpace() name = "param1" ps.add_random(name, low=2, high=4, persist=False, n=10, prior="uniform") params1 = ps.param_grid() self.assertTrue(ps.size, 1) r1 = next(params1)[name] params2 = ps.param_grid() r2 = next(params2)[name] ps.write("test.cfg") self.assertNotEqual(r1, r2)
def test_param_grid(self): ps = ParamSpace() ps.add_value("p1", True) ps.add_list("p2", ["A", "B"]) ps.add_random("p3", low=0, high=4, prior="uniform", n=3) # print("param space size ", ps.grid_size) grid = ps.param_grid() # for params in grid: # print(params) grid = ps.param_grid() grid = list(grid) self.assertEqual(len(grid), 1 * 2 * 3) self.assertEqual(len(grid), ps.size)
def test_param_grid_with_id(self): ps = ParamSpace() ps.add_value("p1", True) ps.add_list("p2", ["A", "B"]) params1 = ps.param_grid(runs=5) self.assertEqual(len(list(params1)), 1 * 2 * 5)
def test_add_range(self): filename = "test.cfg" ps = ParamSpace() ps.add_range("range_param", 0, 10, 1, dtype=int) ps.write(filename) ps = ParamSpace(filename) # print(ps.params["range_param"]) # print(ps.get_range("range_param")) os.remove(filename)
def params_to_skopt(param_space: ParamSpace): """ Converts a parameter space to a list of Dimention objects that can be used with a skopt Optimizer. A skopt Optimizer only receives 3 types of Dimensions: Categorical, Real, or Integer we convert parameters from our parameter space into one of those 3 types. Note that we only convert parameters that have either bounds or with a categorical domain with more than 1 value. If we have constant values in our parameter space, these don't need to be optimized anyway. Another function is provided to convert skopt output values back into a dictionary with a full configuration according to the parameter space (@see values_to_params). Args: param_space: a ParameterSpace where we can get the domain of each parameter Returns: a list of Dimension that can be passed to a skopt Optimizer """ dimensions = [] for param_name in param_space.param_names(): domain_param = param_space.domain(param_name) domain = domain_param["domain"] dtype = DTypes.from_type(domain_param["dtype"]) if len(domain) > 1: if dtype == DTypes.INT: low = min(domain) high = max(domain) dimensions.append(Integer(low, high, name=param_name)) elif dtype == DTypes.FLOAT: low = min(domain) high = max(domain) prior = domain_param.get("prior", None) dimensions.append(Real(low, high, prior=prior, name=param_name)) elif dtype == DTypes.CATEGORICAL: prior = domain_param.get("prior", None) dimensions.append( Categorical(domain, prior, transform="onehot", name=param_name)) return dimensions
def test_write_summary(self): summary_file = "params.csv" ps = ParamSpace() ps.add_value("p1", True) ps.add_list("p2", ["A", "B"]) ps.add_random("p3", low=0, high=4, prior="uniform", n=3) # print("param space size ", ps.grid_size) ps.write_configs(summary_file) written_summary = open(summary_file) reader = csv.DictReader(written_summary) params = [dict(config) for config in reader] # print("read parameters") # for config in params: # print(config) written_summary.close() os.remove(summary_file) self.assertEqual(len(params), ps.size)
def test_write_recover(self): """ There is one issue with writing the param assets which is the fact that these do not preserve the value types, this is expected, the only issue was that we need to ensure that we can use np.random.uniform so regardless of the add_random and add_range arg types, they will be converted to float parameters """ ps = ParamSpace() ps.add_value("p1", True) ps.add_list("p2", ["A", "B"]) ps.add_random("p3", low=0, high=4, prior="uniform", n=3) param_filename = "test.conf" ps.write(param_filename) self.assertTrue(os.path.exists(param_filename)) ParamSpace(param_filename) os.remove(param_filename)
def test_domain(self): ps = ParamSpace() ps.add_value("value", True) domain = ps.domain("value") self.assertIn("domain", domain) self.assertIn("dtype", domain) self.assertEqual(DTypes.CATEGORICAL.value, domain["dtype"]) ps.add_list("bool", [True, False, True]) domain = ps.domain("bool") self.assertIn("domain", domain) self.assertIn("dtype", domain) self.assertEqual(DTypes.CATEGORICAL.value, domain["dtype"]) self.assertListEqual([True, False], domain["domain"]) ps.add_range("bounds", 0, 10, dtype=float) domain = ps.domain("bounds") self.assertIn("domain", domain) self.assertIn("dtype", domain) self.assertIn("prior", domain) self.assertEqual("float", domain["dtype"]) self.assertEqual("uniform", domain["prior"]) ps.add_random("random", 0, 10, prior="log-uniform", dtype=float) domain = ps.domain("bounds") self.assertIn("domain", domain) self.assertIn("dtype", domain) self.assertIn("prior", domain) self.assertEqual("float", domain["dtype"]) self.assertEqual("uniform", domain["prior"])
from exp.params import ParamSpace import os ps = ParamSpace() # prefix used to identify result assets # data default_corpus = os.path.join(os.getenv("HOME"), "data/datasets/ptb/") ps.add_value("corpus", default_corpus) ps.add_value("ngram_size", 5) ps.add_value("save_model", False) # nrp params ps.add_list("k_dim", [1000, 4000, 8000, 10000]) ps.add_list("s_active", [2, 16, 64, 128]) # architecture ps.add_list("embed_dim", [128, 256]) ps.add_value("embed_init", "uniform") ps.add_value("embed_init_val", 0.01) ps.add_value("embed_share", False) ps.add_list("h_dim", [256, 512]) ps.add_value("h_act", "relu") ps.add_value("num_h", 1) ps.add_value("logit_init", "uniform") ps.add_value("logit_init_val", 0.01) # ps.add_value("use_f_predict", True) ps.add_value("f_init", "uniform")
def run(params, module, workers, gpu, n, surrogate, acquisition, name, plot, out, sync, kappa, xi, kuma): logger = logging.getLogger(__name__) handler = logging.FileHandler('{name}.log'.format(name=name), delay=True) handler.setLevel(logging.ERROR) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') handler.setFormatter(formatter) logger.addHandler(handler) opt_results = None out_file = None try: if gpu: # detecting available gpus with load < 0.1 gpu_ids = [g.id for g in GPUtil.getGPUs() if g.load < 0.2] num_workers = min(workers, len(gpu_ids)) if num_workers <= 0: sys.exit(1) else: num_workers = min(workers, mp.cpu_count()) logger.log(logging.DEBUG, "Spawning {} workers".format(num_workers)) if num_workers <= 0: logger.log(logging.ERROR, "--workers cannot be 0") sys.exit(1) # prepare output file out_file_name = '{}_configurations.csv'.format(name) out = out_file_name if out is None else out if out is not None and os.path.isdir(out): out_file_path = os.path.join(out, out_file_name) else: out_file_path = out out_dir = os.path.abspath(os.path.join(out_file_path, os.pardir)) out_file_path = os.path.join(out_dir, out_file_name) param_space = ParamSpace(params) dimensions = params_to_skopt(param_space) optimizer_dims = [d.name for d in dimensions] acquisition_kwargs = None if acquisition == "LCB": acquisition_kwargs = {'kappa': kappa} elif acquisition == "EI": acquisition_kwargs = {'xi': xi} optimizer = Optimizer(dimensions=dimensions, acq_func_kwargs=acquisition_kwargs, base_estimator=surrogate, acq_func=acquisition) out_file = open(out_file_path, 'w') out_writer = csv.DictWriter(out_file, fieldnames=param_space.param_names() + ["id", "evaluation"]) out_writer.writeheader() # setup process pool and queues # manager = mp.Manager() config_queue = Queue() result_queue = Queue() error_queue = Queue() terminate_flags = [Event() for _ in range(num_workers)] processes = [ Process(target=worker, args=(i, module, config_queue, result_queue, error_queue, terminate_flags[i])) for i in range(num_workers) ] configs = [] scores = {} # get initial points at random and submit one job per worker submit(num_workers, optimizer, optimizer_dims, configs, param_space, config_queue) # cfg_if: score num_completed = 0 pending = len(configs) cancel = False for p in processes: p.daemon = True p.start() if plot: fig = plt.gcf() fig.show() fig.canvas.draw() progress_bar = tqdm(total=n, leave=True) if kuma: update_progress_kuma(progress_bar) while num_completed < n and not cancel: try: res = result_queue.get(timeout=1) pid, cfg_id, result = res if not isinstance(result, Exception): cfg = configs[cfg_id] # convert dictionary to x vector that optimizer takes x = [cfg[param] for param in optimizer_dims] # store scores for each config scores[cfg_id] = result out_row = dict(cfg) out_row["evaluation"] = result out_writer.writerow(out_row) # make sure we can see the results in the file as we run the optimizer out_file.flush() opt_results = optimizer.tell(x, result) num_completed += 1 pending -= 1 if plot: plots.plot_convergence(opt_results) fig.canvas.draw() # sync submission of jobs means we wait for all workers to finish if sync and pending == 0: if num_completed != n: num_submit = min(num_workers, n - num_completed) submit(num_submit, optimizer, optimizer_dims, configs, param_space, config_queue) pending = num_submit else: terminate_flags[pid].set() # async submission of jobs: as soon as we receive one result we submit the next if not sync: if (num_completed + pending) != n: submit(1, optimizer, optimizer_dims, configs, param_space, config_queue) pending += 1 else: # signal the current worker for termination terminate_flags[pid].set() progress_bar.update() progress_bar.set_postfix( {"best solution ": opt_results["fun"]}) if kuma: update_progress_kuma(progress_bar) else: _, cfg_id_err, err = error_queue.get() logger.error("configuration {} failed".format(cfg_id_err)) logger.error(err) cancel = True except Empty: pass # try to wait for process termination for process in processes: process.join(timeout=0.5) if process.is_alive(): process.terminate() progress_bar.close() except TomlDecodeError as e: logger.error(traceback.format_exc()) print("\n\n[Invalid parameter file] TOML decode error:\n {}".format(e), file=sys.stderr) except ParamDecodeError as e: logger.error(traceback.format_exc()) print("\n\n[Invalid parameter file]\n {}".format(e), file=sys.stderr) except Exception as e: logger.error(traceback.format_exc()) raise e except KeyboardInterrupt: pass finally: # debugging if opt_results is not None and plot: plt_file = '{}_convergence.pdf'.format(name) out_path = os.path.join(out_dir, plt_file) plt.savefig(out_path, bbox_inches='tight') if out_file is not None: out_file.close()
def main(params, module, runs, name, workers, gpu, config_ids, cancel): logger = logging.getLogger(__name__) handler = logging.FileHandler('{name}.log'.format(name=name), delay=True) handler.setLevel(logging.ERROR) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') handler.setFormatter(formatter) logger.addHandler(handler) try: if gpu: # detecting available gpus with load < 0.1 worker_ids = [g.id for g in GPUtil.getGPUs() if g.load < 0.2] num_workers = min(workers, len(worker_ids)) if num_workers <= 0: logger.log(logging.ERROR, "no gpus available") sys.exit(1) else: num_workers = min(workers, mp.cpu_count()) if num_workers <= 0: logger.log(logging.ERROR, "--workers cannot be 0") sys.exit(1) ps = ParamSpace(filename=params) ps.write_configs('{}_params.csv'.format(name)) param_grid = ps.param_grid(runs=runs) n_tasks = ps.size * runs if len(config_ids) > 0: n_tasks = len(config_ids) * runs param_grid = [p for p in param_grid if p["id"] in config_ids] param_grid = iter(param_grid) num_workers = min(n_tasks, num_workers) print("----------Parameter Space Runner------------") print(":: tasks: {}".format(n_tasks)) print(":: workers: {}".format(num_workers)) print("--------------------------------------------") config_queue = Queue() result_queue = Queue() error_queue = Queue() progress_bar = tqdm(total=n_tasks, leave=True) terminate_flags = [Event() for _ in range(num_workers)] processes = [ Process(target=worker, args=(i, module, config_queue, result_queue, error_queue, terminate_flags[i], cancel)) for i in range(num_workers) ] scores = {} configs = {} # submit num worker jobs for _ in range(num_workers): next_cfg = next(param_grid) configs[next_cfg["id"]] = next_cfg config_queue.put(next_cfg) for p in processes: p.daemon = True p.start() num_completed = 0 pending = num_workers done = False successful = set() while num_completed < n_tasks and not done: try: res = result_queue.get(timeout=1) pid, cfg_id, result = res if not isinstance(result, Exception): successful.add(cfg_id) # cfg = configs[cfg_id] scores[cfg_id] = result num_completed += 1 pending -= 1 if (num_completed + pending) != n_tasks: next_cfg = next(param_grid) configs[next_cfg["id"]] = next_cfg config_queue.put(next_cfg) pending += 1 else: # signal the current worker for termination no more work to be done terminate_flags[pid].set() progress_bar.update() else: # retrieve one error from queue, might not be exactly the one that failed # since other worker can write to the queue, but we will have at least one error to retrieve _, cfg_id_err, err = error_queue.get() logger.error("configuration {} failed".format(cfg_id_err)) logger.error(err) if cancel: done = True else: num_completed += 1 pending -= 1 if (num_completed + pending) != n_tasks: next_cfg = next(param_grid) configs[next_cfg["id"]] = next_cfg config_queue.put(next_cfg) pending += 1 else: # signal the current worker for termination no more work to be done terminate_flags[pid].set() progress_bar.update() except QueueEmpty: pass # try to wait for process termination for process in processes: process.join(timeout=0.5) if process.is_alive(): process.terminate() if len(config_ids) > 0: all_ids = set(config_ids) else: all_ids = set(range(ps.size)) failed_tasks = all_ids.difference(successful) if len(failed_tasks) > 0: ids = " ".join(map(str, failed_tasks)) fail_runs = "failed runs: {}".format(ids) print(fail_runs, file=sys.stderr) logger.warn(fail_runs) progress_bar.close() except TomlDecodeError as e: logger.error(traceback.format_exc()) print("\n\n[Invalid parameter file] TOML decode error:\n {}".format(e), file=sys.stderr) except ParamDecodeError as e: logger.error(traceback.format_exc()) print("\n\n[Invalid parameter file]\n {}".format(e), file=sys.stderr)