def __init__(self, config): self.config = config Logger.__init__(self, 'ParamOptimizer', verbosity=self.config.get('verbosity')) # parse positions self.pos_continuous = np.full(self.config.num_features, False, dtype=bool) self.pos_categories = np.full(self.config.num_features, False, dtype=bool) self.pos_discrete = np.full(self.config.num_features, False, dtype=bool) for feature_index, feature_type in enumerate( self.config.feature_types): if feature_type == 'continuous': self.pos_continuous[feature_index] = True elif feature_type == 'categorical': self.pos_categories[feature_index] = True elif feature_type == 'discrete': self.pos_discrete[feature_index] = True else: feature_name = self.config.feature_names[feature_index] GryffinUnknownSettingsError( 'did not understand parameter type "%s" for parameter "%s".\n\t(%s) Please choose from "continuous" or "categorical"' % (feature_type, feature_name, self.template)) # set up continuous optimization algorithms cont_opt_name = self.config.get('continuous_optimizer') if cont_opt_name == 'adam': from .numpy_optimizers import AdamOptimizer self.opt_con = AdamOptimizer() else: GryffinUnkownSettingsError( 'did not understand continuous optimizer "%s".\n\tPlease choose from "adam"' % cont_opt_name) # set up discrete optimization algorithms disc_opt_name = self.config.get('discrete_optimizer') if disc_opt_name == 'naive': from .numpy_optimizers import NaiveDiscreteOptimizer self.opt_dis = NaiveDiscreteOptimizer() else: GryffinUnknownSettingsError( 'did not understand discrete optimizer "%s".\n\tPlease choose from "naive"' % disc_opt_name) # set up categorical optimization algorithms cat_opt_name = self.config.get('categorical_optimizer') if cat_opt_name == 'naive': from .numpy_optimizers import NaiveCategoricalOptimizer self.opt_cat = NaiveCategoricalOptimizer() else: GryffinUnkownSettingsError( 'did not understand categorical optimizer "%s".\n\tPlease choose from "naive"' % cat_opt_name)
def __init__(self, config, model_details=None): self.COUNTER = 0 self.has_sampled = False self.config = config verbosity = self.config.get('verbosity') if 'bayesian_network' in verbosity: verbosity = verbosity['bayesian_network'] Logger.__init__(self, 'BayesianNetwork', verbosity=verbosity) self.kernel_contribution = lambda x: (np.sum(x), 1.) self.cat_reshaper = CategoryReshaper(self.config) # get bnn model detals if model_details == None: from .model_details import model_details self.model_details = model_details # set up bnn if self.config.get('backend') == 'tensorflow': from .tfprob_interface import TfprobNetwork self.network_executable = '{}/bayesian_network/tfprob_interface/tfprob_interface.py'.format( self.config.get('home')) else: GryffinUnknownSettingsError( 'did not understand backend: "%s".\n\tChoose from "tensorflow"' % self.config_general.backend) # get domain volume self.volume = 1. feature_lengths = self.config.feature_lengths feature_ranges = self.config.feature_ranges for feature_index, feature_type in enumerate( self.config.feature_types): if feature_type == 'continuous': self.volume *= feature_ranges[feature_index] elif feature_type == 'categorical': self.volume *= feature_lengths[feature_index] elif feature_type == 'discrete': self.volume *= feature_ranges[feature_index] else: GryffinUnknownSettingsError( 'did not understand parameter type: "%s" of variable "%s".\n\t(%s) Please choose from "continuous" or "categorical"' % (feature_type, self.config.feature_names[feature_index], self.template)) self.inverse_volume = 1 / self.volume # compute sampling parameter values if self.config.get('sampling_strategies') == 1: self.sampling_param_values = np.zeros(1) else: self.sampling_param_values = np.linspace( -1.0, 1.0, self.config.get('sampling_strategies')) self.sampling_param_values = self.sampling_param_values[::-1] self.sampling_param_values *= self.inverse_volume
def normal_samples(self, loc=0., scale=1., num=1): samples = [] for param_index, param_settings in enumerate(self.config_params): specs = param_settings['specifics'] if param_settings['type'] == 'continuous': param_range = specs['high'] - specs['low'] sampled_values = np.random.normal( 0., scale * param_range, (num, param_settings['size'])) + loc[param_index] samples.append(sampled_values) elif param_settings['type'] == 'categorical': sampled_values = self.categorical_sampler.draw( len(specs['options']), (num, param_settings['size'])) samples.append(sampled_values) elif param_settings['type'] == 'discrete': sampled_values = self.discrete_sampler.draw( specs['low'], specs['high'], (num, param_settings['size'])) samples.append(sampled_values) else: GryffinUnknownSettingsError( 'did not understand variable type: "%s" of parameter "%s".\n\t(%s) Choose from "continuous", "discrete" or "categorical"' % (param_settings['type'], param_settings['name'], self.template)) samples = np.concatenate(samples, axis=1) return samples
def _draw_single_parameter(self, num, param_type, specs): if param_type == 'continuous': sampled_values = self._draw_continuous(low=specs['low'], high=specs['high'], size=(num, 1)) elif param_type == 'categorical': sampled_values = self._draw_categorical(num_options=len(specs['options']), size=(num, 1)) elif param_type == 'discrete': sampled_values = self._draw_discrete(low=specs['low'], high=specs['high'], size=(num, 1)) else: GryffinUnknownSettingsError(f'cannot understand parameter type "{param_type}"') return sampled_values
def create_database(self): if self.config.get_db('format') == 'sqlite': from .sqlite_interface import SqliteDatabase self.database = SqliteDatabase( self.config.get_db('path'), self.db_attrs, 'db', verbosity=self.config.get('verbosity')) else: GryffinUnknownSettingsError( 'did not understand database format: "%s".\n\tChoose from ["none", "sqlite"]' % self.config.get_db('format'))
def _load_optimizer(self, acquisition_constraints): if self.optimizer_type == 'adam': local_optimizer = GradientOptimizer(self.config, acquisition_constraints) elif self.optimizer_type == 'genetic': from .genetic_optimizer import GeneticOptimizer local_optimizer = GeneticOptimizer(self.config, acquisition_constraints) else: GryffinUnknownSettingsError( f'Did not understand optimizer choice {self.optimizer_type}.' f'\n\tPlease choose "adam" or "genetic"') return local_optimizer
def adjust_objectives(self, objs): '''adjust objectives based on optimization goal''' optim_goals = self.config.obj_goals adjusted_objs = np.empty(objs.shape) for obj_index, obj_goal in enumerate(optim_goals): if obj_goal == 'minimize': adjusted_objs[:, obj_index] = objs[:, obj_index] elif obj_goal == 'maximize': adjusted_objs[:, obj_index] = -objs[:, obj_index] else: GryffinUnknownSettingsError( 'did not understand objective goal: "%s" for objective "%s".\n\tChoose from "minimize" or "maximize"' % (obj_goal, self.config.obj_names[obj_index])) return adjusted_objs
def perturb(self, pos, num = 1, scale = 0.05): samples = [] for param_index, param_settings in enumerate(self.config_params): specs = param_settings['specifics'] if param_settings['type'] == 'continuous': sampled_values = self.continuous_sampler.draw(-scale, scale, (num, param_settings['size'])) sampled_values *= specs['high'] - specs['low'] close_samples = pos[param_index] + sampled_values close_samples = np.where(close_samples < specs['low'], specs['low'], close_samples) close_samples = np.where(close_samples > specs['high'], specs['high'], close_samples) samples.append(close_samples) elif param_settings['type'] in ['categorical', 'discrete']: sampled_values = pos[param_index] * np.ones((num, param_settings['size'])).astype(np.float32) samples.append(sampled_values) else: GryffinUnknownSettingsError('did not understand settings') samples = np.concatenate(samples, axis = 1) return samples
def draw(self, num = 1): samples = [] for param_index, param_settings in enumerate(self.config_params): specs = param_settings['specifics'] if param_settings['type'] == 'continuous': sampled_values = self.continuous_sampler.draw(specs['low'], specs['high'], (num, param_settings['size'])) samples.append(sampled_values) elif param_settings['type'] == 'categorical': sampled_values = self.categorical_sampler.draw(len(specs['options']), (num, param_settings['size'])) samples.append(sampled_values) elif param_settings['type'] == 'discrete': sampled_values = self.discrete_sampler.draw(specs['low'], specs['high'], (num, param_settings['size'])) samples.append(sampled_values) else: GryffinUnknownSettingsError('did not understand parameter type: "%s" of parameter "%s".\n\t(%s) Choose from "continuous", "discrete" or "categorical"' % (param_settings['type'], self.config_params[param_index]['name'], self.template)) samples = np.concatenate(samples, axis = 1) self.log('generated uniform samples: \n%s' % str(samples), 'DEBUG') return samples
def _get_volume(self): # get domain volume self.volume = 1. feature_lengths = self.config.feature_lengths feature_ranges = self.config.feature_ranges for feature_index, feature_type in enumerate(self.config.feature_types): if feature_type == 'continuous': self.volume *= feature_ranges[feature_index] elif feature_type == 'categorical': self.volume *= feature_lengths[feature_index] elif feature_type == 'discrete': self.volume *= feature_ranges[feature_index] else: GryffinUnknownSettingsError( 'did not understand parameter type: "%s" of variable "%s".\n\t(%s) Please choose from "continuous" or "categorical"' % ( feature_type, self.config.feature_names[feature_index], self.template)) self.inverse_volume = 1 / self.volume
def __init__(self, config_general, config_params): self.config_general = config_general self.config_params = config_params verbosity = self.config_general.verbosity if 'random_sampler' in self.config_general.verbosity: verbosity = self.config_general.verbosity['random_sampler'] Logger.__init__(self, 'RandomSampler', verbosity) if self.config_general.sampler == 'sobol': from .sobol import SobolContinuous from .uniform import UniformCategorical, UniformDiscrete self.continuous_sampler = SobolContinuous() self.categorical_sampler = UniformCategorical() self.discrete_sampler = UniformDiscrete() elif self.config_general.sampler == 'uniform': from .uniform import UniformCategorical, UniformContinuous, UniformDiscrete self.continuous_sampler = UniformContinuous() self.categorical_sampler = UniformCategorical() self.discrete_sampler = UniformDiscrete() else: GryffinUnknownSettingsError('did not understanding sampler setting: "%s".\n\tChoose from "uniform" or "sobol"' % self.config_general.sampler)
def _perturb_single_parameter(self, ref_value, num, param_type, specs, scale, perturb_categorical=False): if param_type in ['continuous', 'discrete']: # draw uniform within unit range sampled_values = self._draw_continuous(-scale, scale, (num, 1)) # scale to actual range sampled_values *= specs['high'] - specs['low'] # if discrete, we round to nearest integer if param_type == 'discrete': sampled_values = np.around(sampled_values, decimals=0) # add +/- 5% perturbation to sample perturbed_sample = ref_value + sampled_values # make sure we do not cross optimization boundaries perturbed_sample = np.where(perturbed_sample < specs['low'], specs['low'], perturbed_sample) perturbed_sample = np.where(perturbed_sample > specs['high'], specs['high'], perturbed_sample) elif param_type == 'categorical': # i.e. do not perturb if perturb_categorical is False: perturbed_sample = ref_value * np.ones((num, 1)).astype(np.float32) # i.e. random draw else: perturbed_sample = self._draw_categorical(num_options=len(specs['options']), size=(num, 1)) else: GryffinUnknownSettingsError('did not understand settings') return perturbed_sample
def construct_model(self, learning_rate=None): if learning_rate is None: learning_rate = self.learning_rate with self.graph.as_default(): self.sess.close() self.sess = tf.compat.v1.InteractiveSession() self.sess.as_default() self.x = tf.convert_to_tensor(self.rescaled_features, dtype=tf.float32) self.y = tf.convert_to_tensor(self.targets, dtype=tf.float32) # construct precisness self.tau_rescaling = np.zeros((self.num_obs, self.bnn_output_size)) kernel_ranges = self.config.kernel_ranges for obs_index in range(self.num_obs): self.tau_rescaling[obs_index] += kernel_ranges self.tau_rescaling = self.tau_rescaling**2 # construct weight and bias shapes activations = [tf.nn.tanh] weight_shapes, bias_shapes = [[self.feature_size, self.hidden_shape]], [[self.hidden_shape]] for _ in range(1, self.num_layers - 1): activations.append(tf.nn.tanh) weight_shapes.append([self.hidden_shape, self.hidden_shape]) bias_shapes.append([self.hidden_shape]) activations.append(lambda x: x) weight_shapes.append([self.hidden_shape, self.bnn_output_size]) bias_shapes.append([self.bnn_output_size]) # --------------- # construct prior # --------------- self.prior_layer_outputs = [self.x] self.priors = {} for layer_index in range(self.num_layers): weight_shape, bias_shape = weight_shapes[layer_index], bias_shapes[layer_index] activation = activations[layer_index] weight = tfd.Normal(loc=tf.zeros(weight_shape) + self.weight_loc, scale=tf.zeros(weight_shape) + self.weight_scale) bias = tfd.Normal(loc=tf.zeros(bias_shape) + self.bias_loc, scale=tf.zeros(bias_shape) + self.bias_scale) self.priors['weight_%d' % layer_index] = weight self.priors['bias_%d' % layer_index] = bias prior_layer_output = activation(tf.matmul(self.prior_layer_outputs[-1], weight.sample()) + bias.sample()) self.prior_layer_outputs.append(prior_layer_output) self.prior_bnn_output = self.prior_layer_outputs[-1] # draw precisions from gamma distribution self.prior_tau_normed = tfd.Gamma( 12*(self.num_obs/self.frac_feas)**2 + tf.zeros((self.num_obs, self.bnn_output_size)), tf.ones((self.num_obs, self.bnn_output_size)), ) self.prior_tau = self.prior_tau_normed.sample() / self.tau_rescaling self.prior_scale = tfd.Deterministic(1. / tf.sqrt(self.prior_tau)) # ------------------- # construct posterior # ------------------- self.post_layer_outputs = [self.x] self.posteriors = {} for layer_index in range(self.num_layers): weight_shape, bias_shape = weight_shapes[layer_index], bias_shapes[layer_index] activation = activations[layer_index] weight = tfd.Normal(loc=tf.Variable(tf.random.normal(weight_shape)), scale=tf.nn.softplus(tf.Variable(tf.zeros(weight_shape)))) bias = tfd.Normal(loc=tf.Variable(tf.random.normal(bias_shape)), scale=tf.nn.softplus(tf.Variable(tf.zeros(bias_shape)))) self.posteriors['weight_%d' % layer_index] = weight self.posteriors['bias_%d' % layer_index] = bias post_layer_output = activation(tf.matmul(self.post_layer_outputs[-1], weight.sample()) + bias.sample()) self.post_layer_outputs.append(post_layer_output) self.post_bnn_output = self.post_layer_outputs[-1] self.post_tau_normed = tfd.Gamma( 12*(self.num_obs/self.frac_feas)**2 + tf.Variable(tf.zeros((self.num_obs, self.bnn_output_size))), tf.nn.softplus(tf.Variable(tf.ones((self.num_obs, self.bnn_output_size)))), ) self.post_tau = self.post_tau_normed.sample() / self.tau_rescaling self.post_sqrt_tau = tf.sqrt(self.post_tau) self.post_scale = tfd.Deterministic(1. / self.post_sqrt_tau) # map bnn output to prediction post_kernels = {} targets_dict = {} inferences = [] target_element_index = 0 kernel_element_index = 0 while kernel_element_index < len(self.config.kernel_names): kernel_type = self.config.kernel_types[kernel_element_index] kernel_size = self.config.kernel_sizes[kernel_element_index] feature_begin, feature_end = target_element_index, target_element_index + 1 kernel_begin, kernel_end = kernel_element_index, kernel_element_index + kernel_size prior_relevant = self.prior_bnn_output[:, kernel_begin: kernel_end] post_relevant = self.post_bnn_output[:, kernel_begin: kernel_end] if kernel_type == 'continuous': target = self.y[:, kernel_begin: kernel_end] lowers, uppers = self.config.kernel_lowers[kernel_begin: kernel_end], self.config.kernel_uppers[kernel_begin : kernel_end] prior_support = (uppers - lowers) * (1.2 * tf.nn.sigmoid(prior_relevant) - 0.1) + lowers post_support = (uppers - lowers) * (1.2 * tf.nn.sigmoid(post_relevant) - 0.1) + lowers prior_predict = tfd.Normal(prior_support, self.prior_scale[:, kernel_begin: kernel_end].sample()) post_predict = tfd.Normal(post_support, self.post_scale[:, kernel_begin: kernel_end].sample()) targets_dict[prior_predict] = target post_kernels['param_%d' % target_element_index] = { 'loc': tfd.Deterministic(post_support), 'sqrt_prec': tfd.Deterministic(self.post_sqrt_tau[:, kernel_begin: kernel_end]), 'scale': tfd.Deterministic(self.post_scale[:, kernel_begin: kernel_end].sample())} inference = {'pred': post_predict, 'target': target} inferences.append(inference) elif kernel_type in ['categorical', 'discrete']: target = tf.cast(self.y[:, kernel_begin: kernel_end], tf.int32) prior_temperature = 0.5 + 10.0 / (self.num_obs / self.frac_feas) #prior_temperature = 1.0 post_temperature = prior_temperature prior_support = prior_relevant post_support = post_relevant prior_predict_relaxed = tfd.RelaxedOneHotCategorical(prior_temperature, prior_support) prior_predict = tfd.OneHotCategorical(probs=prior_predict_relaxed.sample()) post_predict_relaxed = tfd.RelaxedOneHotCategorical(post_temperature, post_support) post_predict = tfd.OneHotCategorical(probs=post_predict_relaxed.sample()) targets_dict[prior_predict] = target post_kernels['param_%d' % target_element_index] = {'probs': post_predict_relaxed} inference = {'pred': post_predict, 'target': target} inferences.append(inference) ''' Temperature annealing schedule: - temperature of 100 yields 1e-2 deviation from uniform - temperature of 10 yields 1e-1 deviation from uniform - temperature of 1 yields *almost* perfect agreement with expectation - temperature of 0.1 yields perfect agreement with expectation ''' else: GryffinUnknownSettingsError(f'did not understand kernel type: {kernel_type}') target_element_index += 1 kernel_element_index += kernel_size self.post_kernels = post_kernels self.targets_dict = targets_dict self.loss = 0. for inference in inferences: self.loss += - tf.reduce_sum(inference['pred'].log_prob(inference['target'])) self.optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate) self.train_op = self.optimizer.minimize(self.loss) tf.compat.v1.global_variables_initializer().run()
def read_db(self, outfile, verbose): db_content = self.db_fetch_all() if len(db_content) == 0: GryffinValueError('no entries found in database') out_format = outfile.split('.')[-1] if not out_format in ['csv', 'xlsx', 'pkl', 'json']: GryffinUnknownSettingsError( 'did not understand output format "%s".\n\tPlease choose from "csv", "json", "pkl" or "xlsx"' % out_format) # sort entries if self.config.get_db('log_runtimes'): start_times = [ datetime.strptime(entry['start_time'], '%Y-%m-%d %H:%M:%S.%f') for entry in db_content ] sorting_indices = np.argsort(start_times) else: sorting_indices = np.arange(len(db_content)) # create output dictionary relevant_keys = ['start_time', 'end_time', 'runtime'] if self.config.get('auto_desc_gen'): relevant_keys.append('descriptor_summary') first_suggested_batch = db_content[0]['suggested_params'] len_batch = len(first_suggested_batch) param_names = list(first_suggested_batch[0].keys()) for sugg_index in range(len_batch): for param_name in param_names: relevant_keys.append('%s (%d)' % (param_name, sugg_index)) db_dict = {key: [] for key in relevant_keys} for sorting_index in sorting_indices: entry = db_content[sorting_index] for key in entry.keys(): if key == 'suggested_params': for sugg_index in range(len_batch): for param_name in param_names: if not param_name in entry[key][sugg_index]: GryffinValueError( 'could not find parameter "%s" in db entry' % param_name) sugg_params = np.squeeze( entry[key][sugg_index][param_name]) db_key = '%s (%d)' % (param_name, sugg_index) db_dict[db_key].append(sugg_params) else: if not key in relevant_keys: continue db_dict[key].append(entry[key]) # set up writer if out_format in ['csv', 'xlsx']: from DatabaseHandler.PandasWriters import DB_Writer elif out_format in ['json']: from DatabaseHandler.JsonWriters import DB_Writer db_dict['config'] = self.config.settings elif out_format in ['pkl']: from DatabaseHandler.PickleWriters import DB_Writer self.db_writer = DB_Writer(self.config) self.db_writer.write(db_dict, outfile, out_format)