def __init__(self, working_dir, config_space_file): self.working_dir = working_dir self.config_space_file = config_space_file self.result_logger = ResultLogger(working_dir=working_dir) self.config_space = ExperimentArguments.read_configuration_space(config_space_file) self.results = {} experiment_start_time = np.inf self.alpha = 0.4 self.size = 40 colors = ["windows blue", "amber", "green", "red", ] self.pal = sns.xkcd_palette(colors) self.lines = {} # First find starting time for configuration, result in self.result_logger.get_results(self.config_space): budget, result_timestamps, result, exception = result t_started = int(result_timestamps['started']) experiment_start_time = min(t_started, experiment_start_time) for configuration, result, info in self.result_logger.get_results(self.config_space, with_info=True): budget, result_timestamps, result, exception = result if result is None: result = np.inf else: result = 100 + float(result['loss']) * 100 #result = np.sqrt(float(result['loss']) * stdv**2) t_finished = int(result_timestamps['finished']) point = ((t_finished - experiment_start_time) / 3600, result, info['model_based_pick']) if point[0] > 170: continue try: self.results[budget].append(point) except KeyError: self.results[budget] = [point] try: self.lines[configuration].append(point) except KeyError: self.lines[configuration] = [point] self.best_points = {} for key, values in self.results.items(): self.best_points[key] = [] values = sorted(values, key=lambda c: c[0]) best = np.inf for time, result, mb in values: if result < best: best = result self.best_points[key].append((time, result))
def __init__(self): # Parse initial experiment arguments initial_arguments = ExperimentArguments(sections=('experiment', ), use_all_cli_args=False) initial_arguments.add_class_arguments(Experiment) initial_arguments.get_arguments() self.is_master = initial_arguments.is_master self.initialize_backend(initial_arguments.backend.title()) self.ModelClass = getattr(model_module, initial_arguments.model_class_name) self.ReaderClass = getattr(reader_module, initial_arguments.reader_class_name) self.BudgetDecoderClass = getattr( src.hpbandster.budget_decoder, initial_arguments.budget_decoder_class_name) self.TrainerClass = TrainerClass # Populate experiment arguments with arguments from specific classes self.experiment_arguments = ExperimentArguments(use_all_cli_args=True) self.experiment_arguments.add_class_arguments(Experiment) self.experiment_arguments.add_class_arguments(self.ModelClass) self.experiment_arguments.add_class_arguments(self.ReaderClass) self.experiment_arguments.add_class_arguments(self.TrainerClass) self.experiment_arguments.add_class_arguments(TrainManager) self.experiment_arguments.add_class_arguments(BayesianOptimizer) self.experiment_arguments.add_class_arguments(ConfigGenerator) self.experiment_arguments.get_arguments() verbose = initial_arguments.verbose setup_logging(self.experiment_arguments.working_dir, logging.DEBUG if verbose else logging.INFO) self.train_manager = TrainManager( ModelClass=self.ModelClass, ReaderClass=self.ReaderClass, TrainerClass=self.TrainerClass, **self.experiment_arguments.get_arguments()) self.budget_decoder = self.BudgetDecoderClass( **self.experiment_arguments.get_arguments())
def __init__(self, working_dir, config_space_file): self.working_dir = working_dir self.config_space_file = config_space_file self.result_logger = ResultLogger(working_dir=working_dir) self.config_space = ExperimentArguments.read_configuration_space( config_space_file) self.results = {} experiment_start_time = np.inf self.alpha = 0.8 self.size = 50 # First find starting time for configuration, result in self.result_logger.get_results( self.config_space): budget, result_timestamps, result, exception = result t_started = int(result_timestamps['started']) experiment_start_time = min(t_started, experiment_start_time) for configuration, result, info in self.result_logger.get_results( self.config_space, with_info=True): budget, result_timestamps, result, exception = result if result is None: result = np.inf else: result = float(result['loss']) t_finished = int(result_timestamps['finished']) point = ((t_finished - experiment_start_time) / 3600, result, info['model_based_pick']) try: self.results[budget].append(point) except KeyError: self.results[budget] = [point] self.best_points = {} for key, values in self.results.items(): self.best_points[key] = [] values = sorted(values, key=lambda c: c[0]) best = np.inf for time, result, mb in values: if result < best: best = result self.best_points[key].append((time, result))
def main(): # Experiment will read all arguments from the .ini file and command line interface (CLI). experiment = Experiment() train_manager = experiment.train_manager experiment_arguments = experiment.experiment_arguments # Sample random configuration config_space = ExperimentArguments.read_configuration_space( experiment_arguments.config_space_file) random_config = config_space.sample_configuration() # Update experiment arguments using random configuration experiment_arguments = experiment_arguments.updated_with_configuration( random_config) # Run for different budgets full_budget_decoder = FullBudgetDecoder() adjusted_arguments_list = full_budget_decoder.adjusted_arguments( experiment_arguments, budget=None) for experiment_args in adjusted_arguments_list: # Initialize a new directory for each training run experiment_args.run_log_folder = train_manager.get_unique_dir() train_metrics = train_manager.train(experiment_args) valid_metrics = train_manager.validate( experiment_args, data_type=experiment_arguments.validation_data_type) # Print for the user logger.info('Train Metrics:') logger.info( json.dumps(train_metrics.get_summarized_results(), indent=2, sort_keys=True)) logger.info('%s Metrics:' % experiment_arguments.validation_data_type.title()) logger.info( json.dumps(valid_metrics.get_summarized_results(), indent=2, sort_keys=True))
def __init__(self, working_dir, config_space_file, n_iterations, run_id, eta, min_budget, max_budget, ping_interval, nic_name, **kwargs): # Class that is used by the HpBandSter Master to store and manage job configurations and results # At the beginning will load results from the previous HpBandSter runs. Those can be used to initialize # config_generator self.results_logger = ResultLogger(working_dir) # Config space that holds all hyperparameters, default values and possible ranges self.config_space = ExperimentArguments.read_configuration_space( config_space_file) # Config generator that builds a model and samples promising configurations. # Initialized from previous configurations if those are present self.config_generator = ConfigGenerator(self.config_space, working_dir=working_dir, **kwargs) self.config_generator.load_from_results_logger(self.results_logger) self.pyro_conf_file = os.path.join(working_dir, 'pyro.conf') ns_host, ns_port = start_local_nameserver(nic_name=nic_name) logger.info('Started nameserver with %s %s' % (ns_host, ns_port)) if os.path.exists(self.pyro_conf_file): raise RuntimeError('Pyro conf file already exists') with open(self.pyro_conf_file, 'w') as f: logger.info('Creating new Pyro conf file.') json.dump({ 'ns_host': ns_host, 'ns_port': ns_port }, f, sort_keys=True, indent=2) super().__init__(run_id=run_id, config_generator=self.config_generator, working_directory=working_dir, ping_interval=ping_interval, nameserver=ns_host, nameserver_port=ns_port, host=ns_host, logger=master_logger, result_logger=self.results_logger) # Ugly, but no other way to set it self.dispatcher.logger = dispatcher_logger # Hyperband related stuff self.eta = eta self.min_budget = min_budget self.max_budget = max_budget # Precompute some HB stuff self.max_SH_iter = -int( np.log(min_budget / max_budget) / np.log(eta)) + 1 self.budgets = max_budget * np.power( eta, -np.linspace(self.max_SH_iter - 1, 0, self.max_SH_iter)) self.config.update({ 'eta': eta, 'min_budget': min_budget, 'max_budget': max_budget, 'budgets': self.budgets, 'max_SH_iter': self.max_SH_iter, 'min_points_in_model': self.config_generator.min_points_in_model, 'top_n_percent': self.config_generator.top_n_percent, 'num_samples': self.config_generator.num_samples, 'random_fraction': self.config_generator.random_fraction, 'bandwidth_factor': self.config_generator.bw_factor, 'min_bandwidth': self.config_generator.min_bandwidth }) self.n_iterations = n_iterations self.min_budget = min_budget self.max_budget = max_budget self.eta = eta self.ping_interval = ping_interval self.run_id = run_id self.nic_name = nic_name
# Update models for this budget self.kde_models[budget] = dict(good=good_kde, bad=bad_kde) logger.debug('Build new model for budget %f based on %i/%i split.' % (budget, n_good, n_bad)) logger.debug('Best loss for this budget:%f' % (np.min(train_losses))) return True if __name__ == '__main__': from src.experiment_arguments import ExperimentArguments logger.setLevel(logging.DEBUG) working_dir = '/mhome/chrabasp/EEG_Results/BO_Anomaly_6' config_space_file = '/mhome/chrabasp/Workspace/EEG/config/anomaly_simple.pcs' results_logger = ResultLogger(working_dir=working_dir) config_space = ExperimentArguments.read_configuration_space(config_space_file) config_generator = ConfigGenerator(config_space=config_space, working_dir=working_dir, min_points_in_model=0, top_n_percent=20, num_samples=100, random_fraction=0.3, bandwidth_factor=3.0, min_bandwidth=0.001, bw_estimation_method='normal_reference') config_generator.load_from_results_logger(results_logger) model = config_generator.kde_models[27.0]['good'] model_bad = config_generator.kde_models[27.0]['bad'] # print(model)
class Experiment: """ Current design: User provides the names of the classes that should be used for reading the data, decoding the hyperband budget, etc.. This way each experiment can be saved and restored just based on the .ini file that is saved together with the logs. Drawback: Hard to use it as an external library, since new readers, models etc. have to be placed in a correct directory. Solution: We could add modules from which imports are made as an arguments to this class constructor. We leave it like this for now. """ @staticmethod # Parsing arguments def add_arguments(parser): parser.section('experiment') parser.add_argument( "working_dir", type=str, help="Directory for results and other important stuff.") parser.add_argument("model_class_name", default='SimpleRNN', help="Model class used for the training.") parser.add_argument("reader_class_name", default='AnomalyDataReader', help="Reader class used for the training.") parser.add_argument( "budget_decoder_class_name", default='SimpleBudgetDecoder', help="Class used to update setting based on higher budget.") parser.add_argument("backend", default="Pytorch", help="Whether to use Tensorflow or Pytorch.") parser.add_argument("verbose", type=int, default=0, choices=[0, 1], help="If set to 1 then log debug messages.") parser.add_argument( "is_master", type=int, default=0, choices=[0, 1], help="If set to 1 then it will run thread for BO optimization.") return parser def __init__(self): # Parse initial experiment arguments initial_arguments = ExperimentArguments(sections=('experiment', ), use_all_cli_args=False) initial_arguments.add_class_arguments(Experiment) initial_arguments.get_arguments() self.is_master = initial_arguments.is_master self.initialize_backend(initial_arguments.backend.title()) self.ModelClass = getattr(model_module, initial_arguments.model_class_name) self.ReaderClass = getattr(reader_module, initial_arguments.reader_class_name) self.BudgetDecoderClass = getattr( src.hpbandster.budget_decoder, initial_arguments.budget_decoder_class_name) self.TrainerClass = TrainerClass # Populate experiment arguments with arguments from specific classes self.experiment_arguments = ExperimentArguments(use_all_cli_args=True) self.experiment_arguments.add_class_arguments(Experiment) self.experiment_arguments.add_class_arguments(self.ModelClass) self.experiment_arguments.add_class_arguments(self.ReaderClass) self.experiment_arguments.add_class_arguments(self.TrainerClass) self.experiment_arguments.add_class_arguments(TrainManager) self.experiment_arguments.add_class_arguments(BayesianOptimizer) self.experiment_arguments.add_class_arguments(ConfigGenerator) self.experiment_arguments.get_arguments() verbose = initial_arguments.verbose setup_logging(self.experiment_arguments.working_dir, logging.DEBUG if verbose else logging.INFO) self.train_manager = TrainManager( ModelClass=self.ModelClass, ReaderClass=self.ReaderClass, TrainerClass=self.TrainerClass, **self.experiment_arguments.get_arguments()) self.budget_decoder = self.BudgetDecoderClass( **self.experiment_arguments.get_arguments()) # We might implement in the future backends for different libraries, for example Tensorflow @staticmethod def initialize_backend(backend): assert backend == 'Pytorch', 'Currently only Pytorch backend is implemented'