def __init__(self, working_dir, config_space_file): self.working_dir = working_dir self.config_space_file = config_space_file self.result_logger = ResultLogger(working_dir=working_dir) self.config_space = ExperimentArguments.read_configuration_space(config_space_file) self.results = {} experiment_start_time = np.inf self.alpha = 0.4 self.size = 40 colors = ["windows blue", "amber", "green", "red", ] self.pal = sns.xkcd_palette(colors) self.lines = {} # First find starting time for configuration, result in self.result_logger.get_results(self.config_space): budget, result_timestamps, result, exception = result t_started = int(result_timestamps['started']) experiment_start_time = min(t_started, experiment_start_time) for configuration, result, info in self.result_logger.get_results(self.config_space, with_info=True): budget, result_timestamps, result, exception = result if result is None: result = np.inf else: result = 100 + float(result['loss']) * 100 #result = np.sqrt(float(result['loss']) * stdv**2) t_finished = int(result_timestamps['finished']) point = ((t_finished - experiment_start_time) / 3600, result, info['model_based_pick']) if point[0] > 170: continue try: self.results[budget].append(point) except KeyError: self.results[budget] = [point] try: self.lines[configuration].append(point) except KeyError: self.lines[configuration] = [point] self.best_points = {} for key, values in self.results.items(): self.best_points[key] = [] values = sorted(values, key=lambda c: c[0]) best = np.inf for time, result, mb in values: if result < best: best = result self.best_points[key].append((time, result))
def __init__(self, working_dir, config_space_file): self.working_dir = working_dir self.config_space_file = config_space_file self.result_logger = ResultLogger(working_dir=working_dir) self.config_space = ExperimentArguments.read_configuration_space( config_space_file) self.results = {} experiment_start_time = np.inf self.alpha = 0.8 self.size = 50 # First find starting time for configuration, result in self.result_logger.get_results( self.config_space): budget, result_timestamps, result, exception = result t_started = int(result_timestamps['started']) experiment_start_time = min(t_started, experiment_start_time) for configuration, result, info in self.result_logger.get_results( self.config_space, with_info=True): budget, result_timestamps, result, exception = result if result is None: result = np.inf else: result = float(result['loss']) t_finished = int(result_timestamps['finished']) point = ((t_finished - experiment_start_time) / 3600, result, info['model_based_pick']) try: self.results[budget].append(point) except KeyError: self.results[budget] = [point] self.best_points = {} for key, values in self.results.items(): self.best_points[key] = [] values = sorted(values, key=lambda c: c[0]) best = np.inf for time, result, mb in values: if result < best: best = result self.best_points[key].append((time, result))
def __init__(self, working_dir, config_space_file, n_iterations, run_id, eta, min_budget, max_budget, ping_interval, nic_name, **kwargs): # Class that is used by the HpBandSter Master to store and manage job configurations and results # At the beginning will load results from the previous HpBandSter runs. Those can be used to initialize # config_generator self.results_logger = ResultLogger(working_dir) # Config space that holds all hyperparameters, default values and possible ranges self.config_space = ExperimentArguments.read_configuration_space( config_space_file) # Config generator that builds a model and samples promising configurations. # Initialized from previous configurations if those are present self.config_generator = ConfigGenerator(self.config_space, working_dir=working_dir, **kwargs) self.config_generator.load_from_results_logger(self.results_logger) self.pyro_conf_file = os.path.join(working_dir, 'pyro.conf') ns_host, ns_port = start_local_nameserver(nic_name=nic_name) logger.info('Started nameserver with %s %s' % (ns_host, ns_port)) if os.path.exists(self.pyro_conf_file): raise RuntimeError('Pyro conf file already exists') with open(self.pyro_conf_file, 'w') as f: logger.info('Creating new Pyro conf file.') json.dump({ 'ns_host': ns_host, 'ns_port': ns_port }, f, sort_keys=True, indent=2) super().__init__(run_id=run_id, config_generator=self.config_generator, working_directory=working_dir, ping_interval=ping_interval, nameserver=ns_host, nameserver_port=ns_port, host=ns_host, logger=master_logger, result_logger=self.results_logger) # Ugly, but no other way to set it self.dispatcher.logger = dispatcher_logger # Hyperband related stuff self.eta = eta self.min_budget = min_budget self.max_budget = max_budget # Precompute some HB stuff self.max_SH_iter = -int( np.log(min_budget / max_budget) / np.log(eta)) + 1 self.budgets = max_budget * np.power( eta, -np.linspace(self.max_SH_iter - 1, 0, self.max_SH_iter)) self.config.update({ 'eta': eta, 'min_budget': min_budget, 'max_budget': max_budget, 'budgets': self.budgets, 'max_SH_iter': self.max_SH_iter, 'min_points_in_model': self.config_generator.min_points_in_model, 'top_n_percent': self.config_generator.top_n_percent, 'num_samples': self.config_generator.num_samples, 'random_fraction': self.config_generator.random_fraction, 'bandwidth_factor': self.config_generator.bw_factor, 'min_bandwidth': self.config_generator.min_bandwidth }) self.n_iterations = n_iterations self.min_budget = min_budget self.max_budget = max_budget self.eta = eta self.ping_interval = ping_interval self.run_id = run_id self.nic_name = nic_name
# Update models for this budget self.kde_models[budget] = dict(good=good_kde, bad=bad_kde) logger.debug('Build new model for budget %f based on %i/%i split.' % (budget, n_good, n_bad)) logger.debug('Best loss for this budget:%f' % (np.min(train_losses))) return True if __name__ == '__main__': from src.experiment_arguments import ExperimentArguments logger.setLevel(logging.DEBUG) working_dir = '/mhome/chrabasp/EEG_Results/BO_Anomaly_6' config_space_file = '/mhome/chrabasp/Workspace/EEG/config/anomaly_simple.pcs' results_logger = ResultLogger(working_dir=working_dir) config_space = ExperimentArguments.read_configuration_space(config_space_file) config_generator = ConfigGenerator(config_space=config_space, working_dir=working_dir, min_points_in_model=0, top_n_percent=20, num_samples=100, random_fraction=0.3, bandwidth_factor=3.0, min_bandwidth=0.001, bw_estimation_method='normal_reference') config_generator.load_from_results_logger(results_logger) model = config_generator.kde_models[27.0]['good'] model_bad = config_generator.kde_models[27.0]['bad']
class Visualizer: def __init__(self, working_dir, config_space_file): self.working_dir = working_dir self.config_space_file = config_space_file self.result_logger = ResultLogger(working_dir=working_dir) self.config_space = ExperimentArguments.read_configuration_space(config_space_file) self.results = {} experiment_start_time = np.inf self.alpha = 0.4 self.size = 40 colors = ["windows blue", "amber", "green", "red", ] self.pal = sns.xkcd_palette(colors) self.lines = {} # First find starting time for configuration, result in self.result_logger.get_results(self.config_space): budget, result_timestamps, result, exception = result t_started = int(result_timestamps['started']) experiment_start_time = min(t_started, experiment_start_time) for configuration, result, info in self.result_logger.get_results(self.config_space, with_info=True): budget, result_timestamps, result, exception = result if result is None: result = np.inf else: result = 100 + float(result['loss']) * 100 #result = np.sqrt(float(result['loss']) * stdv**2) t_finished = int(result_timestamps['finished']) point = ((t_finished - experiment_start_time) / 3600, result, info['model_based_pick']) if point[0] > 170: continue try: self.results[budget].append(point) except KeyError: self.results[budget] = [point] try: self.lines[configuration].append(point) except KeyError: self.lines[configuration] = [point] self.best_points = {} for key, values in self.results.items(): self.best_points[key] = [] values = sorted(values, key=lambda c: c[0]) best = np.inf for time, result, mb in values: if result < best: best = result self.best_points[key].append((time, result)) def plot(self): for c, (key, values) in zip(self.pal, sorted(self.results.items())): print(c) mb_values = [v[:2] for v in values if v[2] is True] mf_values = [v[:2] for v in values if v[2] is False] try: print('A') x, y = [list(a) for a in zip(*mb_values)] plt.scatter(x, y, c=c, s=self.size, linewidths=2, edgecolors='black', alpha=self.alpha, label=str('Model Based %s' % int(key))) #sns.regplot(np.array(x), np.array(y), color=c, ci=99) except Exception as e: print(e) pass try: x, y = [list(a) for a in zip(*mf_values)] plt.scatter(x, y, c=c, s=self.size, alpha=self.alpha, label=str('Model Free %s' % int(key))) except Exception as e: print(e) pass plt.xlim(-1, 80) plt.ylim(-1, 80) plt.xlabel("Time [hours]", fontsize=23) plt.ylabel("Classification Error [%]", fontsize=23) # #plt.ylabel("RMSE", fontsize=25) plt.tick_params(axis='both', which='major', labelsize=22) plt.tick_params(axis='both', which='minor', labelsize=21) def plot_best_points(self): for c, (key, values) in zip(self.pal, sorted(self.best_points.items())): x, y = [list(a) for a in zip(*values)] plt.step(x, y, c=c, linewidth=3, where='post', alpha=0.8) print(y) def plot_lines(self): for key, value in self.lines.items(): if len(value) > 1: x = [v[0] for v in value] y = [v[1] for v in value] plt.plot(x, y, c='gray', alpha=0.2)
class Visualizer: def __init__(self, working_dir, config_space_file): self.working_dir = working_dir self.config_space_file = config_space_file self.result_logger = ResultLogger(working_dir=working_dir) self.config_space = ExperimentArguments.read_configuration_space( config_space_file) self.results = {} experiment_start_time = np.inf self.alpha = 0.8 self.size = 50 # First find starting time for configuration, result in self.result_logger.get_results( self.config_space): budget, result_timestamps, result, exception = result t_started = int(result_timestamps['started']) experiment_start_time = min(t_started, experiment_start_time) for configuration, result, info in self.result_logger.get_results( self.config_space, with_info=True): budget, result_timestamps, result, exception = result if result is None: result = np.inf else: result = float(result['loss']) t_finished = int(result_timestamps['finished']) point = ((t_finished - experiment_start_time) / 3600, result, info['model_based_pick']) try: self.results[budget].append(point) except KeyError: self.results[budget] = [point] self.best_points = {} for key, values in self.results.items(): self.best_points[key] = [] values = sorted(values, key=lambda c: c[0]) best = np.inf for time, result, mb in values: if result < best: best = result self.best_points[key].append((time, result)) def plot(self): pal = sns.color_palette(n_colors=len(self.results.keys())) for c, (key, values) in zip(pal, sorted(self.results.items())): mb_values = [v[:2] for v in values if v[2] is True] mf_values = [v[:2] for v in values if v[2] is False] x, y = [list(a) for a in zip(*mb_values)] plt.scatter(x, y, c=c, s=self.size, linewidths=2, edgecolors='black', alpha=self.alpha, label=str('Model Based %s' % key)) x, y = [list(a) for a in zip(*mf_values)] plt.scatter(x, y, c=c, s=self.size, alpha=self.alpha, label=str('Model Free %s' % key)) def plot_best_points(self): pal = sns.color_palette(n_colors=len(self.best_points.keys())) for c, (key, values) in zip(pal, sorted(self.best_points.items())): x, y = [list(a) for a in zip(*values)] plt.step(x, y, c=c, linewidth=3)