def get_prob_params():
    """ Returns the problem parameters. """
    prob = Namespace()
    prob.study_name = STUDY_NAME
    study_name = prob.study_name.split('-')[0]

    if IS_DEBUG:
        prob.num_trials = 3
        prob.max_capital = 10
    else:
        prob.num_trials = NUM_TRIALS
        prob.max_capital = MAX_CAPITAL

    # Common
    prob.time_distro = TIME_DISTRO
    prob.num_workers = NUM_WORKERS
    _study_params = {
        'hartmann3': (0.1, 20, 2),
        'hartmann6': (0.1, 30, 1),
        'hartmann': (0.1, 30, 1),
        'branin': (0.1, 20, 3),
        'borehole': (5.0, 40, 1),
        'park1': (0.2, 30, None),
        'park2': (0.1, 30, None),
        'lrg': (0, 0, None),
    }
    _fc_noise_scale, _initial_pool_size, _fidel_dim = _study_params[study_name]
    _initial_pool_size = 0

    # noisy
    prob.noisy_evals = NOISY_EVALS
    if NOISY_EVALS:
        noise_type = 'gauss'
        noise_scale = _fc_noise_scale
    else:
        noise_type = 'no_noise'
        noise_scale = None

    # Create the function caller and worker manager
    if prob.study_name == 'lrg':
        prob.func_caller = LRGOptFunctionCaller('./lrg_sim')
        prob.opt_val = None
    else:
        prob.func_caller = get_syn_func_caller(STUDY_NAME,
                                               noise_type=noise_type,
                                               noise_scale=noise_scale,
                                               fidel_dim=_fidel_dim)
        _, _, _, prob.opt_val, _, _, _ = \
                  get_syn_function(STUDY_NAME, noise_type=noise_type, noise_scale=noise_scale)
    prob.worker_manager = SyntheticWorkerManager(
        prob.num_workers, time_distro='caller_eval_cost')
    prob.save_file_prefix = prob.study_name + ('-debug' if IS_DEBUG else '')
    prob.methods = METHODS
    prob.save_results_dir = SAVE_RESULTS_DIR
    prob.reporter = get_reporter('default')

    # evaluation options
    prob.evaluation_options = Namespace(prev_eval_points='None',
                                        initial_pool_size=_initial_pool_size)
    return prob
Exemple #2
0
def main():
    """ Main function. """
    # Make directories
    if os.path.exists(TMP_DIR):
        shutil.rmtree(TMP_DIR)
    if os.path.exists(EXP_DIR):
        shutil.rmtree(EXP_DIR)
    os.mkdir(TMP_DIR)
    os.mkdir(EXP_DIR)
    # Obtain a reporter
    reporter = get_reporter(open(LOG_FILE, 'w'))  # Writes to file log_mlp

    # First, obtain a function caller: A function_caller is used to evaluate a function
    # defined on a given domain (and a fidelity space). The train_params
    # can be used to specify additional training parameters such as the batch size etc.
    if DATASET == 'cifar10':
        # We have defined the CNNFunctionCaller in cnn_function_caller.py.
        train_params = Namespace(data_dir=CIFAR_DATA_DIR)
        func_caller = CNNFunctionCaller(CNN_CONFIG_FILE,
                                        train_params,
                                        reporter=reporter,
                                        tmp_dir=TMP_DIR)
    else:
        # We have defined the MLPFunctionCaller in mlp_function_caller.py.
        train_params = Namespace(data_train_file=get_train_file_name(DATASET))
        func_caller = MLPFunctionCaller(MLP_CONFIG_FILE,
                                        train_params,
                                        reporter=reporter,
                                        tmp_dir=TMP_DIR)
    # Obtain a worker manager: A worker manager (defined in opt/worker_manager.py) is used
    # to manage (possibly) multiple workers. For a MultiProcessingWorkerManager,
    # the budget should be given in wall clock seconds.
    worker_manager = MultiProcessingWorkerManager(GPU_IDS, EXP_DIR)

    # Run the optimiser
    opt_val, opt_point, _ = bo_from_func_caller(func_caller,
                                                worker_manager,
                                                BUDGET,
                                                is_mf=True,
                                                reporter=reporter)
    # Convert to "raw" format
    raw_opt_point = func_caller.get_raw_domain_point_from_processed(opt_point)
    opt_nn = raw_opt_point[
        0]  # Because first index in the config file is the neural net.

    # Print the optimal value and visualise the best network.
    reporter.writeln('\nOptimum value found: %0.5f' % (opt_val))
    if visualise_nn is not None:
        visualise_file = os.path.join(EXP_DIR, 'optimal_network')
        reporter.writeln('Optimal network visualised in %s.eps.' %
                         (visualise_file))
        visualise_nn(opt_nn, visualise_file)
    else:
        reporter.writeln(
            'Install graphviz (pip install graphviz) to visualise the network.'
        )
Exemple #3
0
def main():
    setup_logging()
    args = parse_args()
    # Obtain a reporter and worker manager
    reporter = get_reporter(open(EXP_LOG_FILE, 'w'))
    worker_manager = SyntheticWorkerManager(num_workers=N_WORKERS,
                                            time_distro='const')

    # Problem settings
    objective_func = get_objective_by_name(args.objective)
    # check MolDomain constructor for full argument list:
    domain_config = {
        'data_source': args.dataset,
        'constraint_checker':
        'organic',  # not specifying constraint_checker defaults to None
        'sampling_seed': args.seed
    }
    chemist_args = {
        'acq_opt_method': 'rand_explorer',
        'init_capital': args.init_pool_size,
        'dom_mol_kernel_type': args.
        kernel,  # e.g. 'distance_kernel_expsum', 'similarity_kernel', 'wl_kernel'
        'acq_opt_max_evals': args.steps,
        'objective': args.objective,
        'max_pool_size': args.max_pool_size,
        'report_results_every': 1,
        'gpb_hp_tune_criterion': 'ml'
    }

    chemist = Chemist(objective_func,
                      domain_config=domain_config,
                      chemist_args=chemist_args,
                      is_mf=False,
                      worker_manager=worker_manager,
                      reporter=reporter)
    opt_val, opt_point, history = chemist.run(args.budget)

    # convert to raw format
    raw_opt_point = chemist.get_raw_domain_point_from_processed(opt_point)
    opt_mol = raw_opt_point[0]

    # Print the optimal value and visualize the molecule and path.
    reporter.writeln(f"\nOptimum value found: {opt_val}")
    reporter.writeln(
        f"Optimum molecule: {opt_mol} with formula {opt_mol.to_formula()}")
    reporter.writeln(f"Synthesis path: {opt_mol.get_synthesis_path()}")

    # visualize mol/synthesis path
    visualize_file = os.path.join(EXP_DIR, 'optimal_molecule.png')
    reporter.writeln(f'Optimal molecule visualized in {visualize_file}')
    visualize_mol(opt_mol, visualize_file)

    with open(SYN_PATH_FILE, 'wb') as f:
        pkl.dump(opt_mol.get_synthesis_path(), f)
 def __init__(self,
              config,
              train_params,
              descr='',
              debug_mode=False,
              reporter='silent'):
     """ Constructor for train params. """
     constructor_args = _get_cpfc_args_from_config(config)
     super(NNFunctionCaller,
           self).__init__(None,
                          descr=descr,
                          fidel_cost_func=self._fidel_cost,
                          **constructor_args)
     self.train_params = deepcopy(train_params)
     self.debug_mode = debug_mode
     self.reporter = get_reporter(reporter)
 def __init__(self,
              objective_func,
              domain_config,
              chemist_args=None,
              worker_manager='default',
              reporter='default',
              is_mf=False,
              mf_strategy=None):
     self.reporter = get_reporter(reporter)
     self.worker_manager = get_worker_manager(worker_manager)
     if domain_config is None:
         domain_config = {}
     self.func_caller = MolFunctionCaller(objective_func,
                                          domain_config=domain_config,
                                          reporter=self.reporter)
     self.is_mf = is_mf
     self.mf_strategy = mf_strategy
     # kernel and explorer-related settings:
     chemist_args = self.fill_with_default_chemist_args(chemist_args)
     self.domain_dist_computers = self.get_dist_computers(chemist_args)
     self.options = self.prepare_chemist_options(chemist_args,
                                                 domain_config)
Exemple #6
0
def get_prob_params():
  """ Returns the problem parameters. """
  prob = Namespace()
  prob.study_name = STUDY_NAME
  if IS_DEBUG:
    prob.num_trials = 3
    prob.max_num_evals = 20
  else:
    prob.num_trials = NUM_TRIALS
    prob.max_num_evals = MAX_NUM_EVALS
  # Common
  prob.num_workers = NUM_WORKERS
  # study_params in order config_file, objective, cost_func, budget in hours.
  _study_params = {
    'supernova': ('../demos_real/supernova/config_mf.json',
                  supernova_obj_mf, supernova_cost_mf, 4.0),
    'salsa': ('../demos_real/salsa/config_salsa_energy_mf.json',
              salsa_obj_mf, salsa_cost_mf, 8.0),
    'gbcsensorless': ('../demos_real/skltree/config_gbc_mf.json',
              gbcsensorless_obj_mf, gbcsensorless_cost_mf, 4.0),
    'gbrprotein': ('../demos_real/skltree/config_gbr_mf.json',
              gbrprotein_obj_mf, gbrprotein_cost_mf, 3.0),
    'gbrnaval': ('../demos_real/skltree/config_naval_gbr_mf.json',
              gbrnaval_obj_mf, gbrnaval_cost_mf, 3.0),
    'rfrnews': ('../demos_real/skltree/config_rfr_mf.json',
              rfrnews_obj_mf, rfrnews_cost_mf, 6.0),
    }
#   _study_params = {
#     'supernova': ('../demos_real/supernova/config_mf_duplicate.json',
#                   supernova_obj_mf, supernova_cost_mf, 2.0),
#     'salsa': ('../demos_real/salsa/config_salsa_energy_mf.json',
#               salsa_obj_mf, salsa_cost_mf, 4.0),
#     }
  domain_config_file, raw_func, raw_fidel_cost_func, budget_in_hours = \
    _study_params[prob.study_name]
  # noisy
  prob.noisy_evals = False
  noise_type = 'no_noise'
  noise_scale = None
  # Create domain, function_caller and worker_manager
  config = load_config_file(domain_config_file)
  func_caller = get_multifunction_caller_from_config(raw_func, config,
                  raw_fidel_cost_func=raw_fidel_cost_func, noise_type=noise_type,
                  noise_scale=noise_scale)
  # Set max_capital
  if IS_DEBUG:
    prob.max_capital = 0.05 * 60 * 60
  else:
    prob.max_capital = budget_in_hours * 60 * 60
  # Store everything in prob
  prob.func_caller = func_caller
  prob.tmp_dir = get_evaluation_tmp_dir(prob.study_name)
  prob.worker_manager = RealWorkerManager(prob.num_workers, prob.tmp_dir)
  prob.save_file_prefix = prob.study_name + ('-debug' if IS_DEBUG else '')
  prob.methods = METHODS
  prob.save_results_dir = SAVE_RESULTS_DIR
  prob.reporter = get_reporter('default')
  # evaluation options
  prob.evaluation_options = Namespace(prev_eval_points='none',
                                      initial_pool_size=0)
  return prob
def get_prob_params():
    """ Returns the problem parameters. """
    prob = Namespace()
    prob.study_name = STUDY_NAME
    if IS_DEBUG:
        prob.num_trials = 3
        prob.max_capital = 10
    else:
        prob.num_trials = NUM_TRIALS
        prob.max_capital = MAX_CAPITAL
    # Common
    prob.time_distro = TIME_DISTRO
    prob.num_workers = NUM_WORKERS
    _study_params = {
        'branin': ('synthetic/branin/config_mf.json', branin_mf,
                   cost_branin_mf, 0.1, 0, 1),
        'hartmann3_2': ('synthetic/hartmann3_2/config_mf.json', hartmann3_2_mf,
                        cost_hartmann3_2_mf, 0.1, 0, 1),
        'hartmann6_4': ('synthetic/hartmann6_4/config_mf.json', hartmann6_4_mf,
                        cost_hartmann6_4_mf, 0.1, 0, 1),
        'borehole_6': ('synthetic/borehole_6/config_mf.json', borehole_6_mf,
                       cost_borehole_6_mf, 1, 0, 1),
        'park2_4': ('synthetic/park2_4/config_mf.json', park2_4_mf,
                    cost_park2_4_mf, 0.3, 0, 1),
        'park2_3': ('synthetic/park2_3/config_mf.json', park2_3_mf,
                    cost_park2_3_mf, 0.1, 0, 1),
        'park1_3': ('synthetic/park1_3/config_mf.json', park1_3_mf,
                    cost_park1_3_mf, 0.5, 0, 1),
    }
    (domain_config_file_suffix, raw_func, raw_fidel_cost_func, _fc_noise_scale,
     _initial_pool_size, _) = _study_params[prob.study_name]
    domain_config_file = os.path.join(DRAGONFLY_EXPERIMENTS_DIR,
                                      domain_config_file_suffix)
    # noisy
    prob.noisy_evals = NOISY_EVALS
    if NOISY_EVALS:
        noise_type = 'gauss'
        noise_scale = _fc_noise_scale
    else:
        noise_type = 'no_noise'
        noise_scale = None
    # Create domain, function_caller and worker_manager
    config = load_config_file(domain_config_file)
    func_caller = get_multifunction_caller_from_config(
        raw_func,
        config,
        raw_fidel_cost_func=raw_fidel_cost_func,
        noise_type=noise_type,
        noise_scale=noise_scale)
    # Set max_capital
    if hasattr(func_caller, 'fidel_cost_func'):
        prob.max_capital = prob.max_capital * \
                           func_caller.fidel_cost_func(func_caller.fidel_to_opt)
    else:
        prob.max_capital = prob.max_capital
    # Store everything in prob
    prob.func_caller = func_caller
    prob.worker_manager = SyntheticWorkerManager(
        prob.num_workers, time_distro='caller_eval_cost')
    prob.save_file_prefix = prob.study_name + ('-debug' if IS_DEBUG else '')
    prob.methods = METHODS
    prob.save_results_dir = SAVE_RESULTS_DIR
    prob.reporter = get_reporter('default')
    # evaluation options
    prob.evaluation_options = Namespace(prev_eval_points='none',
                                        initial_pool_size=_initial_pool_size)
    return prob
        f"Minimum synthesis score over the path: {compute_min_sa_score(top_point)}"
    )
    with open(SYN_PATH_FILE, 'wb') as f:
        pkl.dump(top_point.get_synthesis_path(), f)

    sorted_by_prop = sorted(pool, key=obj_func)[-5:]
    for opt_mol in sorted_by_prop:
        min_sa_score = compute_min_sa_score(opt_mol)
        reporter.writeln(
            f"Minimum synthesis score of optimal molecules: {min_sa_score}")

    vals = history['objective_vals']
    plt.title(f'Optimizing {objective} with random explorer')
    plt.plot(range(len(vals)), vals)
    plt.savefig(PLOT_FILE)


if __name__ == "__main__":
    reporter = get_reporter(open(EXP_LOG_FILE, 'w'))
    args = parse_args()
    # exp_settings = {'init_pool_size': args.init_pool_size, 'seed': args.seed, 'max_pool_size': args.max_pool_size
    #                 'n_steps': args.budget, 'objective': args.objective, 'dataset': args.dataset}
    exp_settings = vars(args)
    reporter.writeln(
        f"RandomExplorer experiment settings: objective {exp_settings['objective']}, "
        + f"init pool of size {exp_settings['init_pool_size']}, " +
        f"dataset {exp_settings['dataset']}, seed {exp_settings['seed']}, " +
        f"max_pool_size {exp_settings['max_pool_size']}, budget {exp_settings['budget']}"
    )
    explore_and_validate_synth(**exp_settings, reporter=reporter)
 def __init__(self, objective, domain_config, descr='', reporter='silent'):
     constructor_args = get_cp_func_caller_args(domain_config)
     super(MolFunctionCaller, self).__init__(objective,
                                             descr=descr,
                                             **constructor_args)
     self.reporter = get_reporter(reporter)