class BNN: """Neural network models which model aleatoric uncertainty (and possibly epistemic uncertainty with ensembling). """ def __init__(self, params): """Initializes a class instance. Arguments: params (DotMap): A dotmap of model parameters. .name (str): Model name, used for logging/use in variable scopes. Warning: Models with the same name will overwrite each other. .num_networks (int): (optional) The number of networks in the ensemble. Defaults to 1. Ignored if model is being loaded. .model_dir (str/None): (optional) Path to directory from which model will be loaded, and saved by default. Defaults to None. .load_model (bool): (optional) If True, model will be loaded from the model directory, assuming that the files are generated by a model of the same name. Defaults to False. .sess (tf.Session/None): The session that this model will use. If None, creates a session with its own associated graph. Defaults to None. """ self.name = get_required_argument(params, 'name', 'Must provide name.') self.model_dir = params.get('model_dir', None) if params.get('sess', None) is None: config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.4 self._sess = tf.Session(config=config) else: self._sess = params.get('sess') # Instance variables self.finalized = False self.layers, self.max_logvar, self.min_logvar = [], None, None self.decays, self.optvars, self.nonoptvars = [], [], [] self.end_act, self.end_act_name = None, None self.scaler = None # Training objects self.optimizer = None self.sy_train_in, self.sy_train_targ = None, None self.train_op, self.mse_loss = None, None # Prediction objects self.sy_pred_in2d, self.sy_pred_mean2d_fac, self.sy_pred_var2d_fac = None, None, None self.sy_pred_mean2d, self.sy_pred_var2d = None, None self.sy_pred_in3d, self.sy_pred_mean3d_fac, self.sy_pred_var3d_fac = None, None, None if params.get('load_model', False): if self.model_dir is None: raise ValueError( "Cannot load model without providing model directory.") self._load_structure() self.num_nets, self.model_loaded = self.layers[ 0].get_ensemble_size(), True print("Model loaded from %s." % self.model_dir) else: self.num_nets = params.get('num_networks', 1) self.model_loaded = False if self.num_nets == 1: print("Created a neural network with variance predictions.") else: print( "Created an ensemble of %d neural networks with variance predictions." % (self.num_nets)) @property def is_probabilistic(self): return True @property def is_tf_model(self): return True @property def sess(self): return self._sess ################################### # Network Structure Setup Methods # ################################### def add(self, layer): """Adds a new layer to the network. Arguments: layer: (layer) The new layer to be added to the network. If this is the first layer, the input dimension of the layer must be set. Returns: None. """ if self.finalized: raise RuntimeError( "Cannot modify network structure after finalizing.") if len(self.layers) == 0 and layer.get_input_dim() is None: raise ValueError("Must set input dimension for the first layer.") if self.model_loaded: raise RuntimeError("Cannot add layers to a loaded model.") layer.set_ensemble_size(self.num_nets) if len(self.layers) > 0: layer.set_input_dim(self.layers[-1].get_output_dim()) self.layers.append(layer.copy()) def pop(self): """Removes and returns the most recently added layer to the network. Returns: (layer) The removed layer. """ if len(self.layers) == 0: raise RuntimeError("Network is empty.") if self.finalized: raise RuntimeError( "Cannot modify network structure after finalizing.") if self.model_loaded: raise RuntimeError("Cannot remove layers from a loaded model.") return self.layers.pop() def finalize(self, optimizer, optimizer_args=None, *args, **kwargs): """Finalizes the network. Arguments: optimizer: (tf.train.Optimizer) An optimizer class from those available at tf.train.Optimizer. optimizer_args: (dict) A dictionary of arguments for the __init__ method of the chosen optimizer. Returns: None """ if len(self.layers) == 0: raise RuntimeError("Cannot finalize an empty network.") if self.finalized: raise RuntimeError("Can only finalize a network once.") optimizer_args = {} if optimizer_args is None else optimizer_args self.optimizer = optimizer(**optimizer_args) # Add variance output. self.layers[-1].set_output_dim(2 * self.layers[-1].get_output_dim()) # Remove last activation to isolate variance from activation function. self.end_act = self.layers[-1].get_activation() self.end_act_name = self.layers[-1].get_activation(as_func=False) self.layers[-1].unset_activation() # Construct all variables. with self.sess.as_default(): with tf.variable_scope(self.name): self.scaler = TensorStandardScaler( self.layers[0].get_input_dim()) self.max_logvar = tf.Variable( np.ones([1, self.layers[-1].get_output_dim() // 2]) / 2., dtype=tf.float32, name="max_log_var") self.min_logvar = tf.Variable( -np.ones([1, self.layers[-1].get_output_dim() // 2]) * 10., dtype=tf.float32, name="min_log_var") for i, layer in enumerate(self.layers): with tf.variable_scope("Layer%i" % i): layer.construct_vars() self.decays.extend(layer.get_decays()) self.optvars.extend(layer.get_vars()) self.optvars.extend([self.max_logvar, self.min_logvar]) self.nonoptvars.extend(self.scaler.get_vars()) # Set up training with tf.variable_scope(self.name): self.optimizer = optimizer(**optimizer_args) self.sy_train_in = tf.placeholder( dtype=tf.float32, shape=[self.num_nets, None, self.layers[0].get_input_dim()], name="training_inputs") self.sy_train_targ = tf.placeholder( dtype=tf.float32, shape=[ self.num_nets, None, self.layers[-1].get_output_dim() // 2 ], name="training_targets") train_loss = tf.reduce_sum( self._compile_losses(self.sy_train_in, self.sy_train_targ, inc_var_loss=True)) train_loss += tf.add_n(self.decays) # regularization to ensure max_logvar not grow too much beyond training distribution # and min_logvar not drop below training distribution train_loss += 0.01 * tf.reduce_sum( self.max_logvar) - 0.01 * tf.reduce_sum(self.min_logvar) self.mse_loss = self._compile_losses(self.sy_train_in, self.sy_train_targ, inc_var_loss=False) self.train_op = self.optimizer.minimize(train_loss, var_list=self.optvars) # Initialize all variables self.sess.run( tf.variables_initializer(self.optvars + self.nonoptvars + self.optimizer.variables())) # Set up prediction with tf.variable_scope(self.name): self.sy_pred_in2d = tf.placeholder( dtype=tf.float32, shape=[None, self.layers[0].get_input_dim()], name="2D_training_inputs") self.sy_pred_mean2d_fac, self.sy_pred_var2d_fac = \ self.create_prediction_tensors(self.sy_pred_in2d, factored=True) self.sy_pred_mean2d = tf.reduce_mean(self.sy_pred_mean2d_fac, axis=0) self.sy_pred_var2d = tf.reduce_mean(self.sy_pred_var2d_fac, axis=0) + \ tf.reduce_mean(tf.square(self.sy_pred_mean2d_fac - self.sy_pred_mean2d), axis=0) self.sy_pred_in3d = tf.placeholder( dtype=tf.float32, shape=[self.num_nets, None, self.layers[0].get_input_dim()], name="3D_training_inputs") self.sy_pred_mean3d_fac, self.sy_pred_var3d_fac = \ self.create_prediction_tensors(self.sy_pred_in3d, factored=True) # Load model if needed # self.optimizer.variables() no need to save and load if self.model_loaded: with self.sess.as_default(): params_dict = loadmat( os.path.join(self.model_dir, "%s.mat" % self.name)) all_vars = self.nonoptvars + self.optvars for i, var in enumerate(all_vars): var.load(params_dict[str(i)]) self.finalized = True ################# # Model Methods # ################# def train(self, inputs, targets, batch_size=32, epochs=100, hide_progress=False, holdout_ratio=0.0, max_logging=5000): """Trains/Continues network training Arguments: inputs (np.ndarray): Network inputs in the training dataset in rows. targets (np.ndarray): Network target outputs in the training dataset in rows corresponding to the rows in inputs. batch_size (int): The minibatch size to be used for training. epochs (int): Number of epochs (full network passes that will be done. hide_progress (bool): If True, hides the progress bar shown at the beginning of training. Returns: None """ def shuffle_rows(arr): idxs = np.argsort(np.random.uniform(size=arr.shape), axis=-1) return arr[np.arange(arr.shape[0])[:, None], idxs] # Split into training and holdout sets num_holdout = min(int(inputs.shape[0] * holdout_ratio), max_logging) # shuffle np.range(inputs.shape[0]) permutation = np.random.permutation(inputs.shape[0]) inputs, holdout_inputs = inputs[permutation[num_holdout:]], inputs[ permutation[:num_holdout]] targets, holdout_targets = targets[permutation[num_holdout:]], targets[ permutation[:num_holdout]] holdout_inputs = np.tile(holdout_inputs[None], [self.num_nets, 1, 1]) holdout_targets = np.tile(holdout_targets[None], [self.num_nets, 1, 1]) with self.sess.as_default(): self.scaler.fit(inputs) #range bewteen 0-inuputs.shape[0] # [ensemble size, train_sample_size] idxs = np.random.randint(inputs.shape[0], size=[self.num_nets, inputs.shape[0]]) if hide_progress: epoch_range = range(epochs) else: epoch_range = trange(epochs, unit="epoch(s)", desc="Network training") for _ in epoch_range: for batch_num in range(int(np.ceil(idxs.shape[-1] / batch_size))): batch_idxs = idxs[:, batch_num * batch_size:(batch_num + 1) * batch_size] self.sess.run(self.train_op, feed_dict={ self.sy_train_in: inputs[batch_idxs], self.sy_train_targ: targets[batch_idxs] }) idxs = shuffle_rows(idxs) if not hide_progress: if holdout_ratio < 1e-12: epoch_range.set_postfix({ "Training loss(es)": self.sess.run(self.mse_loss, feed_dict={ self.sy_train_in: inputs[idxs[:, :max_logging]], self.sy_train_targ: targets[idxs[:, :max_logging]] }) }) else: epoch_range.set_postfix({ "Training loss(es)": self.sess.run(self.mse_loss, feed_dict={ self.sy_train_in: inputs[idxs[:, :max_logging]], self.sy_train_targ: targets[idxs[:, :max_logging]] }), "Holdout loss(es)": self.sess.run(self.mse_loss, feed_dict={ self.sy_train_in: holdout_inputs, self.sy_train_targ: holdout_targets }) }) def predict(self, inputs, factored=False, *args, **kwargs): """Returns the distribution predicted by the model for each input vector in inputs. Behavior is affected by the dimensionality of inputs and factored as follows: inputs is 2D, factored=True: Each row is treated as an input vector. Returns a mean of shape [ensemble_size, batch_size, output_dim] and variance of shape [ensemble_size, batch_size, output_dim], where N(mean[i, j, :], diag([i, j, :])) is the predicted output distribution by the ith model in the ensemble on input vector j. inputs is 2D, factored=False: Each row is treated as an input vector. Returns a mean of shape [batch_size, output_dim] and variance of shape [batch_size, output_dim], where aggregation is performed as described in the paper. inputs is 3D, factored=True/False: Each row in the last dimension is treated as an input vector. Returns a mean of shape [ensemble_size, batch_size, output_dim] and variance of sha [ensemble_size, batch_size, output_dim], where N(mean[i, j, :], diag([i, j, :])) is the predicted output distribution by the ith model in the ensemble on input vector [i, j]. Arguments: inputs (np.ndarray): An array of input vectors in rows. See above for behavior. factored (bool): See above for behavior. """ if len(inputs.shape) == 2: if factored: return self.sess.run( [self.sy_pred_mean2d_fac, self.sy_pred_var2d_fac], feed_dict={self.sy_pred_in2d: inputs}) else: return self.sess.run([self.sy_pred_mean2d, self.sy_pred_var2d], feed_dict={self.sy_pred_in2d: inputs}) else: return self.sess.run( [self.sy_pred_mean3d_fac, self.sy_pred_var3d_fac], feed_dict={self.sy_pred_in3d: inputs}) def create_prediction_tensors(self, inputs, factored=False, *args, **kwargs): """See predict() above for documentation. """ factored_mean, factored_variance = self._compile_outputs(inputs) if inputs.shape.ndims == 2 and not factored: # DS # [ensemble_size, nparticle, dO] -> [nparticle, dO] mean = tf.reduce_mean(factored_mean, axis=0) variance = tf.reduce_mean(tf.square(factored_mean - mean), axis=0) + \ tf.reduce_mean(factored_variance, axis=0) return mean, variance # TS1, TSInf # [ensemble_size, nparticle / ensemble_size, dO] return factored_mean, factored_variance def save(self, savedir=None): """Saves all information required to recreate this model in two files in savedir (or self.model_dir if savedir is None), one containing the model structuure and the other containing all variables in the network. savedir (str): (Optional) Path to which files will be saved. If not provided, self.model_dir (the directory provided at initialization) will be used. """ if not self.finalized: raise RuntimeError() model_dir = self.model_dir if savedir is None else savedir # Write structure to file with open(os.path.join(model_dir, "%s.nns" % self.name), "w+") as f: for layer in self.layers[:-1]: f.write("%s\n" % repr(layer)) last_layer_copy = self.layers[-1].copy() last_layer_copy.set_activation(self.end_act_name) last_layer_copy.set_output_dim(last_layer_copy.get_output_dim() // 2) f.write("%s\n" % repr(last_layer_copy)) # Save network parameters (including scalers) in a .mat file var_vals = {} for i, var_val in enumerate( self.sess.run(self.nonoptvars + self.optvars)): var_vals[str(i)] = var_val savemat(os.path.join(model_dir, "%s.mat" % self.name), var_vals) def _load_structure(self): """Uses the saved structure in self.model_dir with the name of this network to initialize the structure of this network. """ structure = [] with open(os.path.join(self.model_dir, "%s.nns" % self.name), "r") as f: for line in f: kwargs = { key: val for (key, val) in [argval.split("=") for argval in line[3:-2].split(", ")] } kwargs["input_dim"] = int(kwargs["input_dim"]) kwargs["output_dim"] = int(kwargs["output_dim"]) kwargs["weight_decay"] = None if kwargs[ "weight_decay"] == "None" else float( kwargs["weight_decay"]) kwargs["activation"] = None if kwargs[ "activation"] == "None" else kwargs["activation"][1:-1] kwargs["ensemble_size"] = int(kwargs["ensemble_size"]) structure.append(FC(**kwargs)) self.layers = structure ####################### # Compilation methods # ####################### def _compile_outputs(self, inputs, ret_log_var=False): """Compiles the output of the network at the given inputs. If inputs is 2D, returns a 3D tensor where output[i] is the output of the ith network in the ensemble. If inputs is 3D, returns a 3D tensor where output[i] is the output of the ith network on the ith input matrix. Arguments: inputs: (tf.Tensor) A tensor representing the inputs to the network ret_log_var: (bool) If True, returns the log variance instead of the variance. Returns: (tf.Tensors) The mean and variance/log variance predictions at inputs for each network in the ensemble. """ dim_output = self.layers[-1].get_output_dim() cur_out = self.scaler.transform(inputs) for layer in self.layers: cur_out = layer.compute_output_tensor(cur_out) mean = cur_out[:, :, :dim_output // 2] if self.end_act is not None: mean = self.end_act(mean) # notice the gradient flows here through max_logvar and min_logvar # equal as exp(max_logvar) * exp(logvar) / exp(max_logvar) + exp(logvar) logvar = self.max_logvar - tf.nn.softplus( self.max_logvar - cur_out[:, :, dim_output // 2:]) # equal as exp(logvar) + exp(min_var) logvar = self.min_logvar + tf.nn.softplus(logvar - self.min_logvar) if ret_log_var: return mean, logvar else: return mean, tf.exp(logvar) def _compile_losses(self, inputs, targets, inc_var_loss=True): """Helper method for compiling the loss function. The loss function is obtained from the log likelihood, assuming that the output distribution is Gaussian, with both mean and (diagonal) covariance matrix being determined by network outputs. Arguments: inputs: (tf.Tensor) A tensor representing the input batch targets: (tf.Tensor) The desired targets for each input vector in inputs. inc_var_loss: (bool) If True, includes log variance loss. Returns: (tf.Tensor) A tensor representing the loss on the input arguments. """ mean, log_var = self._compile_outputs(inputs, ret_log_var=True) inv_var = tf.exp(-log_var) if inc_var_loss: # max log liklihood in for Gaussian mse_losses = tf.reduce_mean(tf.reduce_mean( tf.square(mean - targets) * inv_var, axis=-1), axis=-1) var_losses = tf.reduce_mean(tf.reduce_mean(log_var, axis=-1), axis=-1) total_losses = mse_losses + var_losses else: total_losses = tf.reduce_mean(tf.reduce_mean(tf.square(mean - targets), axis=-1), axis=-1) return total_losses
class NN: """Neural network models which cannot capture aleatoric uncertainty (but possibly epistemic uncertainty with ensembling). """ def __init__(self, params): """Initializes a class instance. Arguments: params (DotMap): A dotmap of model parameters. .name (str): Model name, used for logging/use in variable scopes. Warning: Models with the same name will overwrite each other. .num_networks (int): (optional) The number of networks in the ensemble. Defaults to 1. Ignored if model is being loaded. .model_dir (str/None): (optional) Path to directory from which model will be loaded, and saved by default. Defaults to None. .load_model (bool): (optional) If True, model will be loaded from the model directory, assuming that the files are generated by a model of the same name. Defaults to False. .sess (tf.Session/None): The session that this model will use. If None, creates a session with its own associated graph. Defaults to None. """ self.name = get_required_argument(params, 'name', 'Must provide name.') self.model_dir = params.get('model_dir', None) if params.get('sess', None) is None: config = tf.ConfigProto() # config.gpu_options.allow_growth = True self._sess = tf.Session(config=config) else: self._sess = params.get('sess') # Instance variables self.finalized = False self.layers, self.decays, self.optvars, self.nonoptvars = [], [], [], [] self.scaler = None # Training objects self.optimizer = None self.sy_train_in, self.sy_train_targ = None, None self.train_op, self.mse_loss = None, None # Prediction objects self.sy_pred_in2d, self.sy_pred_mean2d_fac = None, None self.sy_pred_mean2d, self.sy_pred_var2d = None, None self.sy_pred_in3d, self.sy_pred_mean3d_fac = None, None if params.get('load_model', False): if self.model_dir is None: raise ValueError( "Cannot load model without providing model directory.") self._load_structure() self.num_nets, self.model_loaded = self.layers[ 0].get_ensemble_size(), True print("Model loaded from %s." % self.model_dir) else: self.num_nets = params.get('num_networks', 1) self.model_loaded = False if self.num_nets == 1: print("Created a neural network without variance predictions.") else: print( "Created an ensemble of %d neural networks without variance predictions." % (self.num_nets)) @property def is_probabilistic(self): return True if self.num_nets > 1 else False @property def is_tf_model(self): return True @property def sess(self): return self._sess ################################### # Network Structure Setup Methods # ################################### def add(self, layer): """Adds a new layer to the network. Arguments: layer: (layer) The new layer to be added to the network. If this is the first layer, the input dimension of the layer must be set. Returns: None. """ if self.finalized: raise RuntimeError( "Cannot modify network structure after finalizing.") if len(self.layers) == 0 and layer.get_input_dim() is None: raise ValueError("Must set input dimension for the first layer.") if self.model_loaded: raise RuntimeError("Cannot add layers to a loaded model.") layer.set_ensemble_size(self.num_nets) if len(self.layers) > 0: layer.set_input_dim(self.layers[-1].get_output_dim()) self.layers.append(layer.copy()) def pop(self): """Removes and returns the most recently added layer to the network. Returns: (layer) The removed layer. """ if len(self.layers) == 0: raise RuntimeError("Network is empty.") if self.finalized: raise RuntimeError( "Cannot modify network structure after finalizing.") if self.model_loaded: raise RuntimeError("Cannot remove layers from a loaded model.") return self.layers.pop() def finalize(self, optimizer, optimizer_args=None, suffix="", *args, **kwargs): """Finalizes the network. Arguments: optimizer: (tf.train.Optimizer) An optimizer class from those available at tf.train.Optimizer. optimizer_args: (dict) A dictionary of arguments for the __init__ method of the chosen optimizer. Returns: None """ if len(self.layers) == 0: raise RuntimeError("Cannot finalize an empty network.") if self.finalized: raise RuntimeError("Can only finalize a network once.") optimizer_args = {} if optimizer_args is None else optimizer_args self.optimizer = optimizer(**optimizer_args) # Construct all variables. with self.sess.as_default(): with tf.variable_scope(self.name): self.scaler = TensorStandardScaler( self.layers[0].get_input_dim(), suffix) for i, layer in enumerate(self.layers): with tf.variable_scope(("Layer%i" + suffix) % i): layer.construct_vars() self.decays.extend(layer.get_decays()) self.optvars.extend(layer.get_vars()) self.nonoptvars.extend(self.scaler.get_vars()) # Setup training with tf.variable_scope(self.name): self.optimizer = optimizer(**optimizer_args) self.sy_train_in = tf.placeholder( dtype=tf.float32, shape=[self.num_nets, None, self.layers[0].get_input_dim()], name="training_inputs") self.sy_train_targ = tf.placeholder( dtype=tf.float32, shape=[self.num_nets, None, self.layers[-1].get_output_dim()], name="training_targets") train_loss = tf.reduce_sum( self._compile_losses(self.sy_train_in, self.sy_train_targ)) train_loss += tf.add_n(self.decays) self.mse_loss = self._compile_losses(self.sy_train_in, self.sy_train_targ) self.train_op = self.optimizer.minimize(train_loss, var_list=self.optvars) # Initialize all variables self.sess.run( tf.variables_initializer(self.optvars + self.nonoptvars + self.optimizer.variables())) # Setup prediction with tf.variable_scope(self.name): self.sy_pred_in2d = tf.placeholder( dtype=tf.float32, shape=[None, self.layers[0].get_input_dim()], name="2D_training_inputs") self.sy_pred_mean2d_fac = self.create_prediction_tensors( self.sy_pred_in2d, factored=True)[0] self.sy_pred_mean2d = tf.reduce_mean(self.sy_pred_mean2d_fac, axis=0) self.sy_pred_var2d = tf.reduce_mean( tf.square(self.sy_pred_mean2d_fac - self.sy_pred_mean2d), axis=0) self.sy_pred_in3d = tf.placeholder( dtype=tf.float32, shape=[self.num_nets, None, self.layers[0].get_input_dim()], name="3D_training_inputs") self.sy_pred_mean3d_fac = \ self.create_prediction_tensors(self.sy_pred_in3d, factored=True)[0] # Load model if needed if self.model_loaded: with self.sess.as_default(): params_dict = loadmat( os.path.join(self.model_dir, "%s.mat" % self.name)) all_vars = self.nonoptvars + self.optvars for i, var in enumerate(all_vars): var.load(params_dict[str(i)]) self.finalized = True ################# # Model Methods # ################# def train(self, inputs, targets, batch_size=32, epochs=100, hide_progress=False, holdout_ratio=0.0, max_logging=5000): def shuffle_rows(arr): idxs = np.argsort(np.random.uniform(size=arr.shape), axis=-1) return arr[np.arange(arr.shape[0])[:, None], idxs] # Split into training and holdout sets num_holdout = min(int(inputs.shape[0] * holdout_ratio), max_logging) permutation = np.random.permutation(inputs.shape[0]) inputs, holdout_inputs = inputs[permutation[num_holdout:]], inputs[ permutation[:num_holdout]] targets, holdout_targets = targets[permutation[num_holdout:]], targets[ permutation[:num_holdout]] holdout_inputs = np.tile(holdout_inputs[None], [self.num_nets, 1, 1]) holdout_targets = np.tile(holdout_targets[None], [self.num_nets, 1, 1]) with self.sess.as_default(): self.scaler.fit(inputs) idxs = np.random.randint(inputs.shape[0], size=[self.num_nets, inputs.shape[0]]) if hide_progress: epoch_range = range(epochs) else: epoch_range = trange(epochs, unit="epoch(s)", desc="Network training") for _ in epoch_range: for batch_num in range(int(np.ceil(idxs.shape[-1] / batch_size))): batch_idxs = idxs[:, batch_num * batch_size:(batch_num + 1) * batch_size] self.sess.run(self.train_op, feed_dict={ self.sy_train_in: inputs[batch_idxs], self.sy_train_targ: targets[batch_idxs] }) idxs = shuffle_rows(idxs) if not hide_progress: if holdout_ratio < 1e-12: epoch_range.set_postfix({ "Current loss(es)": self.sess.run(self.mse_loss, feed_dict={ self.sy_train_in: inputs[idxs[:, :max_logging]], self.sy_train_targ: targets[idxs[:, :max_logging]] }), }) else: epoch_range.set_postfix({ "Current loss(es)": self.sess.run(self.mse_loss, feed_dict={ self.sy_train_in: inputs[idxs[:, :max_logging]], self.sy_train_targ: targets[idxs[:, :max_logging]] }), "Holdout loss(es)": self.sess.run(self.mse_loss, feed_dict={ self.sy_train_in: holdout_inputs, self.sy_train_targ: holdout_targets }) }) def predict(self, inputs, factored=False, *args, **kwargs): """Returns the distribution predicted by the model for each input vector in inputs. Behavior is affected by the dimensionality of inputs and factored as follows: inputs is 2D, factored=True: Each row is treated as an input vector. Returns a mean of shape [ensemble_size, batch_size, output_dim] and variance of shape [ensemble_size, batch_size, output_dim], where N(mean[i, j, :], diag([i, j, :])) is the predicted output distribution by the ith model in the ensemble on input vector j. inputs is 2D, factored=False: Each row is treated as an input vector. Returns a mean of shape [batch_size, output_dim] and variance of shape [batch_size, output_dim], where aggregation is performed as described in the paper. inputs is 3D, factored=True/False: Each row in the last dimension is treated as an input vector. Returns a mean of shape [ensemble_size, batch_size, output_dim] and variance of sha [ensemble_size, batch_size, output_dim], where N(mean[i, j, :], diag([i, j, :])) is the predicted output distribution by the ith model in the ensemble on input vector [i, j]. Arguments: inputs (np.ndarray): An array of input vectors in rows. See above for behavior. factored (bool): See above for behavior. """ if len(inputs.shape) == 2: if factored: mean = self.sess.run(self.sy_pred_mean2d_fac, feed_dict={self.sy_pred_in2d: inputs}) return mean, None else: return self.sess.run([self.sy_pred_mean2d, self.sy_pred_var2d], feed_dict={self.sy_pred_in2d: inputs}) else: mean = self.sess.run(self.sy_pred_mean3d_fac, feed_dict={self.sy_pred_in3d: inputs}) return mean, None def create_prediction_tensors(self, inputs, factored=False, *args, **kwargs): """See predict() above for documentation. """ factored_mean = self._compile_outputs(inputs) if inputs.shape.ndims == 2 and not factored: mean = tf.reduce_mean(factored_mean, axis=0) variance = tf.reduce_mean(tf.square(factored_mean - mean), axis=0) return mean, variance return factored_mean, None def save(self, savedir=None): """Saves all information required to recreate this model in two files in savedir (or self.model_dir if savedir is None), one containing the model structuure and the other containing all variables in the network. savedir (str): (Optional) Path to which files will be saved. If not provided, self.model_dir (the directory provided at initialization) will be used. """ if not self.finalized: raise RuntimeError() model_dir = self.model_dir if savedir is None else savedir # Write structure to file with open(os.path.join(model_dir, "%s.nns" % self.name), "w+") as f: for layer in self.layers: f.write("%s\n" % repr(layer)) # Save network parameters (including scalers) in a .mat file var_vals = {} for i, var_val in enumerate( self.sess.run(self.nonoptvars + self.optvars)): var_vals[str(i)] = var_val savemat(os.path.join(model_dir, "%s.mat" % self.name), var_vals) def _load_structure(self): """Uses the saved structure in self.model_dir with the name of this network to initialize the structure of this network. """ structure = [] with open(os.path.join(self.model_dir, "%s.nns" % self.name), "r") as f: for line in f: kwargs = { key: val for (key, val) in [argval.split("=") for argval in line[3:-2].split(", ")] } kwargs["input_dim"] = int(kwargs["input_dim"]) kwargs["output_dim"] = int(kwargs["output_dim"]) kwargs["weight_decay"] = None if kwargs[ "weight_decay"] == "None" else float( kwargs["weight_decay"]) kwargs["activation"] = None if kwargs[ "activation"] == "None" else kwargs["activation"][1:-1] kwargs["ensemble_size"] = int(kwargs["ensemble_size"]) structure.append(FC(**kwargs)) self.layers = structure ####################### # Compilation methods # ####################### def _compile_outputs(self, inputs): cur_out = self.scaler.transform(inputs) for layer in self.layers: cur_out = layer.compute_output_tensor(cur_out) return cur_out def _compile_losses(self, inputs, targets): mean = self._compile_outputs(inputs) return tf.reduce_mean(tf.reduce_mean(tf.square(mean - targets) / 2, axis=-1), axis=-1)
class BNN: """Neural network models which model aleatoric uncertainty (and possibly epistemic uncertainty with ensembling). """ def __init__(self, params): """Initializes a class instance. Arguments: params (DotMap): A dotmap of model parameters. .name (str): Model name, used for logging/use in variable scopes. Warning: Models with the same name will overwrite each other. .num_networks (int): (optional) The number of networks in the ensemble. Defaults to 1. Ignored if model is being loaded. .model_dir (str/None): (optional) Path to directory from which model will be loaded, and saved by default. Defaults to None. .load_model (bool): (optional) If True, model will be loaded from the model directory, assuming that the files are generated by a model of the same name. Defaults to False. .sess (tf.Session/None): The session that this model will use. If None, creates a session with its own associated graph. Defaults to None. """ self.name = get_required_argument(params, 'name', 'Must provide name.') self.model_dir = params.get('model_dir', None) if params.get('sess', None) is None: config = tf.ConfigProto() # config.gpu_options.allow_growth = True self._sess = tf.Session(config=config) else: self._sess = params.get('sess') # Instance variables self.finalized = False self.layers, self.max_logvar, self.min_logvar = [], None, None self.decays, self.optvars, self.nonoptvars = [], [], [] self.end_act, self.end_act_name = None, None self.scaler = None # Training objects self.optimizer = None self.sy_train_in, self.sy_train_targ = None, None self.train_op, self.mse_loss = None, None # Prediction objects self.sy_pred_in2d, self.sy_pred_mean2d_fac, self.sy_pred_var2d_fac = None, None, None self.sy_pred_mean2d, self.sy_pred_var2d = None, None self.sy_pred_in3d, self.sy_pred_mean3d_fac, self.sy_pred_var3d_fac = None, None, None if params.get('load_model', False): if self.model_dir is None: raise ValueError( "Cannot load model without providing model directory.") self._load_structure() self.num_nets, self.model_loaded = self.layers[ 0].get_ensemble_size(), True print("Model loaded from %s." % self.model_dir) else: self.num_nets = params.get('num_networks', 1) self.model_loaded = False if self.num_nets == 1: print("Created a neural network with variance predictions.") else: print( "Created an ensemble of %d neural networks with variance predictions." % (self.num_nets)) @property def is_probabilistic(self): return True @property def is_tf_model(self): return True @property def sess(self): return self._sess ################################### # Network Structure Setup Methods # ################################### def add(self, layer): """Adds a new layer to the network. Arguments: layer: (layer) The new layer to be added to the network. If this is the first layer, the input dimension of the layer must be set. Returns: None. """ if self.finalized: raise RuntimeError( "Cannot modify network structure after finalizing.") if len(self.layers) == 0 and layer.get_input_dim() is None: raise ValueError("Must set input dimension for the first layer.") if self.model_loaded: raise RuntimeError("Cannot add layers to a loaded model.") layer.set_ensemble_size(self.num_nets) if len(self.layers) > 0: layer.set_input_dim(self.layers[-1].get_output_dim()) self.layers.append(layer.copy()) def pop(self): """Removes and returns the most recently added layer to the network. Returns: (layer) The removed layer. """ if len(self.layers) == 0: raise RuntimeError("Network is empty.") if self.finalized: raise RuntimeError( "Cannot modify network structure after finalizing.") if self.model_loaded: raise RuntimeError("Cannot remove layers from a loaded model.") return self.layers.pop() def finalize(self, optimizer, optimizer_args=None, *args, **kwargs): """Finalizes the network. Arguments: optimizer: (tf.train.Optimizer) An optimizer class from those available at tf.train.Optimizer. optimizer_args: (dict) A dictionary of arguments for the __init__ method of the chosen optimizer. Returns: None """ if len(self.layers) == 0: raise RuntimeError("Cannot finalize an empty network.") if self.finalized: raise RuntimeError("Can only finalize a network once.") optimizer_args = {} if optimizer_args is None else optimizer_args self.optimizer = optimizer(**optimizer_args) out_dim = self.layers[-1].get_output_dim() # Add variance output. self.layers[-1].set_output_dim(2 * out_dim) # Remove last activation to isolate variance from activation function. self.end_act = self.layers[-1].get_activation() self.end_act_name = self.layers[-1].get_activation(as_func=False) self.layers[-1].unset_activation() self.recalibrator = RecalibrationLayer(out_dim) self.cal_vars = self.recalibrator.get_vars() # Construct all variables. with self.sess.as_default(): with tf.variable_scope(self.name): self.scaler = TensorStandardScaler( self.layers[0].get_input_dim()) self.max_logvar = tf.Variable( np.ones([1, self.layers[-1].get_output_dim() // 2]) / 2., dtype=tf.float32, name="max_log_var") self.min_logvar = tf.Variable( -np.ones([1, self.layers[-1].get_output_dim() // 2]) * 10., dtype=tf.float32, name="min_log_var") for i, layer in enumerate(self.layers): with tf.variable_scope("Layer%i" % i): layer.construct_vars() self.decays.extend(layer.get_decays()) self.optvars.extend(layer.get_vars()) self.optvars.extend([self.max_logvar, self.min_logvar]) self.nonoptvars.extend(self.scaler.get_vars()) # Set up training with tf.variable_scope(self.name): self.optimizer = optimizer(**optimizer_args) self.sy_train_in = tf.placeholder( dtype=tf.float32, shape=[self.num_nets, None, self.layers[0].get_input_dim()], name="training_inputs") self.sy_train_targ = tf.placeholder( dtype=tf.float32, shape=[ self.num_nets, None, self.layers[-1].get_output_dim() // 2 ], name="training_targets") train_loss = tf.reduce_sum( self._compile_losses(self.sy_train_in, self.sy_train_targ, inc_var_loss=True)) train_loss += tf.add_n(self.decays) train_loss += 0.01 * tf.reduce_sum( self.max_logvar) - 0.01 * tf.reduce_sum(self.min_logvar) self.mse_loss = self._compile_losses(self.sy_train_in, self.sy_train_targ, inc_var_loss=False) self.train_op = self.optimizer.minimize(train_loss, var_list=self.optvars) with tf.variable_scope('calibration'): self.sy_cdf_in = tf.placeholder( dtype=tf.float32, shape=[None, self.recalibrator.get_output_dim()], name="training_inputs_cdf") self.sy_cdf_true = tf.placeholder( dtype=tf.float32, shape=[None, self.recalibrator.get_output_dim()], name="training_targets_cdf") self.cal_optimizer = tf.train.AdamOptimizer(learning_rate=5e-2) cdf_pred = self.recalibrator(self.sy_cdf_in, activation=False) cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits( labels=self.sy_cdf_true, logits=cdf_pred) self.cal_loss = tf.reduce_mean(tf.reduce_mean(cross_entropy, axis=-1), axis=-1) self.cal_train_op = self.cal_optimizer.minimize( self.cal_loss, var_list=self.cal_vars) # Initialize all variables self.sess.run( tf.variables_initializer(self.optvars + self.nonoptvars + self.optimizer.variables() + self.cal_vars + self.cal_optimizer.variables())) # Set up prediction with tf.variable_scope(self.name): self.sy_pred_in2d = tf.placeholder( dtype=tf.float32, shape=[None, self.layers[0].get_input_dim()], name="2D_training_inputs") self.sy_pred_mean2d_fac, self.sy_pred_var2d_fac = \ self.create_prediction_tensors(self.sy_pred_in2d, factored=True) self.sy_pred_mean2d = tf.reduce_mean(self.sy_pred_mean2d_fac, axis=0) self.sy_pred_var2d = tf.reduce_mean(self.sy_pred_var2d_fac, axis=0) + \ tf.reduce_mean(tf.square(self.sy_pred_mean2d_fac - self.sy_pred_mean2d), axis=0) self.sy_pred_in3d = tf.placeholder( dtype=tf.float32, shape=[self.num_nets, None, self.layers[0].get_input_dim()], name="3D_training_inputs") self.sy_pred_mean3d_fac, self.sy_pred_var3d_fac = \ self.create_prediction_tensors(self.sy_pred_in3d, factored=True) # Load model if needed if self.model_loaded: with self.sess.as_default(): params_dict = loadmat( os.path.join(self.model_dir, "%s.mat" % self.name)) all_vars = self.nonoptvars + self.optvars + self.cal_vars for i, var in enumerate(all_vars): var.load(params_dict[str(i)]) self.finalized = True ################# # Model Methods # ################# def calibrate(self, inputs, targets, batch_size=32, epochs=1000, hide_progress=False, holdout_ratio=0.0, max_logging=5000): """Calibrates network post-training Arguments: inputs (np.ndarray): Network inputs in the training dataset in rows. targets (np.ndarray): Network target outputs in the training dataset in rows corresponding to the rows in inputs. batch_size (int): The minibatch size to be used for training. epochs (int): Number of epochs (full network passes that will be done. hide_progress (bool): If True, hides the progress bar shown at the beginning of training. Returns: None """ with self.sess.as_default(): self.scaler.fit(inputs) all_mus, all_vars = self.predict(inputs) all_ys = targets train_x = np.zeros_like(all_ys) train_y = np.zeros_like(all_ys) for d in range(all_mus.shape[1]): mu = all_mus[:, d] var = all_vars[:, d] ys = all_ys[:, d] cdf_pred = norm.cdf(ys, loc=mu, scale=np.sqrt(var)) cdf_true = np.array( [np.sum(cdf_pred < p) / len(cdf_pred) for p in cdf_pred]) train_x[:, d] = cdf_pred train_y[:, d] = cdf_true epochs = 200 if hide_progress: epoch_range = range(epochs) else: epoch_range = trange(epochs, unit="epoch(s)", desc="Calibration training") def iterate_minibatches(inp, targs, batchsize, shuffle=True): assert inp.shape[0] == targs.shape[0] indices = np.arange(inp.shape[0]) if shuffle: np.random.shuffle(indices) last_idx = 0 for curr_idx in range(0, inp.shape[0] - batchsize + 1, batchsize): curr_batch = indices[curr_idx:curr_idx + batchsize] last_idx = curr_idx + batchsize yield inp[curr_batch], targs[curr_batch] if inp.shape[0] % batchsize != 0: last_batch = indices[last_idx:] yield inp[last_batch], targs[last_batch] for _ in epoch_range: for x_batch, y_batch in iterate_minibatches( train_x, train_y, batch_size): self.sess.run(self.cal_train_op, feed_dict={ self.sy_cdf_in: x_batch, self.sy_cdf_true: y_batch }) if not hide_progress: epoch_range.set_postfix({ "Training loss(es)": self.sess.run(self.cal_loss, feed_dict={ self.sy_cdf_in: train_x, self.sy_cdf_true: train_y }) }) def save_calibration_info(self, inputs, targets, save_dir, calibrate=True): all_mus, all_vars = self.predict(inputs) all_ys = targets all_cdfs_pred = norm.cdf(all_ys, loc=all_mus, scale=np.sqrt(all_vars)) all_cdfs_pred_cal = self.sess.run( self.recalibrator(all_cdfs_pred)) if calibrate else [] # Save network parameters (including scalers) in a .mat file save_vals = { 'all_mus': all_mus, 'all_vars': all_mus, 'all_ys': targets, 'all_cdfs_pred': all_cdfs_pred, 'all_cdfs_pred_cal': all_cdfs_pred_cal } savemat(os.path.join(save_dir, "calib_logs.mat"), save_vals) def plot_calibration(self, inputs, targets, save_dir): all_mus, all_vars = self.predict(inputs) all_ys = targets timestamp = datetime.now().strftime("%Y%m%d-%H%M%S") all_cdfs_pred = norm.cdf(all_ys, loc=all_mus, scale=np.sqrt(all_vars)) all_cdfs_pred_cal = self.sess.run(self.recalibrator(all_cdfs_pred)) for d in range(all_mus.shape[1]): mu = all_mus[:, d] var = all_vars[:, d] ys = all_ys[:, d] cdf_pred = all_cdfs_pred[:, d] cdf_pred_cal = all_cdfs_pred_cal[:, d] cal_ps = np.linspace(0, 1, num=30) cdf_emp = [np.sum(cdf_pred < p) / len(cdf_pred) for p in cal_ps] cdf_emp_cal = [ np.sum(cdf_pred_cal < p) / len(cdf_pred_cal) for p in cal_ps ] plt.close('all') plt.figure() ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2) ax2 = plt.subplot2grid((3, 1), (2, 0)) ax1.set_title('Calibration Curve BNN (dim={}, t={})'.format( d, timestamp)) ax1.set_xlabel('Expected confidence level') ax1.set_ylabel('Observed confidence level') ax1.plot(cal_ps, cdf_emp, "s-", label='Uncalibrated') ax1.plot(cal_ps, cdf_emp_cal, "s-", label='Calibrated') ax1.plot(cal_ps, cal_ps, alpha=0.6, color='gray') ax1.legend() ax2.hist(cdf_pred, range=(0, 1), bins=10, label='Uncalibrated', histtype="step", lw=2) ax2.hist(cdf_pred_cal, range=(0, 1), bins=10, label='Calibrated', histtype="step", lw=2) ax2.set_xlabel("Probability of predicted value") ax2.set_ylabel("Count") ax2.legend(loc="upper left", ncol=2) plt.tight_layout() print('Saving dim={}'.format(d)) plt.savefig( os.path.join(save_dir, 'cal_{}_dim_{}.png'.format(timestamp, d))) def sample_predictions(self, means, var, calibrate=True): """ Input shape of mean and var is N x d where N is batch size and d is size of state space dimension """ if not calibrate: return means + tf.random_normal( shape=tf.shape(means), mean=0, stddev=1) * tf.sqrt(var) ps = tf.random.uniform(shape=means.shape) ps = self.recalibrator.inv_call(ps, activation=True) ps = tf.clip_by_value(ps, 1e-6, 1 - 1e-6) dist = tfp.distributions.Normal(loc=means, scale=tf.sqrt(var)) ret = dist.quantile(ps) return ret def train(self, inputs, targets, batch_size=32, epochs=100, hide_progress=False, holdout_ratio=0.0, max_logging=5000): """Trains/Continues network training Arguments: inputs (np.ndarray): Network inputs in the training dataset in rows. targets (np.ndarray): Network target outputs in the training dataset in rows corresponding to the rows in inputs. batch_size (int): The minibatch size to be used for training. epochs (int): Number of epochs (full network passes that will be done. hide_progress (bool): If True, hides the progress bar shown at the beginning of training. Returns: None """ def shuffle_rows(arr): idxs = np.argsort(np.random.uniform(size=arr.shape), axis=-1) return arr[np.arange(arr.shape[0])[:, None], idxs] # Split into training and holdout sets num_holdout = min(int(inputs.shape[0] * holdout_ratio), max_logging) permutation = np.random.permutation(inputs.shape[0]) inputs, holdout_inputs = inputs[permutation[num_holdout:]], inputs[ permutation[:num_holdout]] targets, holdout_targets = targets[permutation[num_holdout:]], targets[ permutation[:num_holdout]] holdout_inputs = np.tile(holdout_inputs[None], [self.num_nets, 1, 1]) holdout_targets = np.tile(holdout_targets[None], [self.num_nets, 1, 1]) with self.sess.as_default(): self.scaler.fit(inputs) idxs = np.random.randint(inputs.shape[0], size=[self.num_nets, inputs.shape[0]]) if hide_progress: epoch_range = range(epochs) else: epoch_range = trange(epochs, unit="epoch(s)", desc="Network training") for _ in epoch_range: for batch_num in range(int(np.ceil(idxs.shape[-1] / batch_size))): batch_idxs = idxs[:, batch_num * batch_size:(batch_num + 1) * batch_size] self.sess.run(self.train_op, feed_dict={ self.sy_train_in: inputs[batch_idxs], self.sy_train_targ: targets[batch_idxs] }) idxs = shuffle_rows(idxs) if not hide_progress: if holdout_ratio < 1e-12: epoch_range.set_postfix({ "Training loss(es)": self.sess.run(self.mse_loss, feed_dict={ self.sy_train_in: inputs[idxs[:, :max_logging]], self.sy_train_targ: targets[idxs[:, :max_logging]] }) }) else: epoch_range.set_postfix({ "Training loss(es)": self.sess.run(self.mse_loss, feed_dict={ self.sy_train_in: inputs[idxs[:, :max_logging]], self.sy_train_targ: targets[idxs[:, :max_logging]] }), "Holdout loss(es)": self.sess.run(self.mse_loss, feed_dict={ self.sy_train_in: holdout_inputs, self.sy_train_targ: holdout_targets }) }) def predict(self, inputs, factored=False, *args, **kwargs): """Returns the distribution predicted by the model for each input vector in inputs. Behavior is affected by the dimensionality of inputs and factored as follows: inputs is 2D, factored=True: Each row is treated as an input vector. Returns a mean of shape [ensemble_size, batch_size, output_dim] and variance of shape [ensemble_size, batch_size, output_dim], where N(mean[i, j, :], diag([i, j, :])) is the predicted output distribution by the ith model in the ensemble on input vector j. inputs is 2D, factored=False: Each row is treated as an input vector. Returns a mean of shape [batch_size, output_dim] and variance of shape [batch_size, output_dim], where aggregation is performed as described in the paper. inputs is 3D, factored=True/False: Each row in the last dimension is treated as an input vector. Returns a mean of shape [ensemble_size, batch_size, output_dim] and variance of sha [ensemble_size, batch_size, output_dim], where N(mean[i, j, :], diag([i, j, :])) is the predicted output distribution by the ith model in the ensemble on input vector [i, j]. Arguments: inputs (np.ndarray): An array of input vectors in rows. See above for behavior. factored (bool): See above for behavior. """ if len(inputs.shape) == 2: if factored: return self.sess.run( [self.sy_pred_mean2d_fac, self.sy_pred_var2d_fac], feed_dict={self.sy_pred_in2d: inputs}) else: return self.sess.run([self.sy_pred_mean2d, self.sy_pred_var2d], feed_dict={self.sy_pred_in2d: inputs}) else: return self.sess.run( [self.sy_pred_mean3d_fac, self.sy_pred_var3d_fac], feed_dict={self.sy_pred_in3d: inputs}) def create_prediction_tensors(self, inputs, factored=False, *args, **kwargs): """See predict() above for documentation. """ factored_mean, factored_variance = self._compile_outputs(inputs) if inputs.shape.ndims == 2 and not factored: mean = tf.reduce_mean(factored_mean, axis=0) variance = tf.reduce_mean(tf.square(factored_mean - mean), axis=0) + \ tf.reduce_mean(factored_variance, axis=0) return mean, variance return factored_mean, factored_variance def save(self, savedir=None): """Saves all information required to recreate this model in two files in savedir (or self.model_dir if savedir is None), one containing the model structuure and the other containing all variables in the network. savedir (str): (Optional) Path to which files will be saved. If not provided, self.model_dir (the directory provided at initialization) will be used. """ if not self.finalized: raise RuntimeError() model_dir = self.model_dir if savedir is None else savedir # Write structure to file with open(os.path.join(model_dir, "%s.nns" % self.name), "w+") as f: for layer in self.layers[:-1]: f.write("%s\n" % repr(layer)) last_layer_copy = self.layers[-1].copy() last_layer_copy.set_activation(self.end_act_name) last_layer_copy.set_output_dim(last_layer_copy.get_output_dim() // 2) f.write("%s\n" % repr(last_layer_copy)) # Save network parameters (including scalers) in a .mat file var_vals = {} for i, var_val in enumerate( self.sess.run(self.nonoptvars + self.optvars + self.cal_vars)): var_vals[str(i)] = var_val savemat(os.path.join(model_dir, "%s.mat" % self.name), var_vals) def _load_structure(self): """Uses the saved structure in self.model_dir with the name of this network to initialize the structure of this network. """ structure = [] with open(os.path.join(self.model_dir, "%s.nns" % self.name), "r") as f: for line in f: kwargs = { key: val for (key, val) in [argval.split("=") for argval in line[3:-2].split(", ")] } kwargs["input_dim"] = int(kwargs["input_dim"]) kwargs["output_dim"] = int(kwargs["output_dim"]) kwargs["weight_decay"] = None if kwargs[ "weight_decay"] == "None" else float( kwargs["weight_decay"]) kwargs["activation"] = None if kwargs[ "activation"] == "None" else kwargs["activation"][1:-1] kwargs["ensemble_size"] = int(kwargs["ensemble_size"]) structure.append(FC(**kwargs)) self.layers = structure ####################### # Compilation methods # ####################### def _compile_outputs(self, inputs, ret_log_var=False): """Compiles the output of the network at the given inputs. If inputs is 2D, returns a 3D tensor where output[i] is the output of the ith network in the ensemble. If inputs is 3D, returns a 3D tensor where output[i] is the output of the ith network on the ith input matrix. Arguments: inputs: (tf.Tensor) A tensor representing the inputs to the network ret_log_var: (bool) If True, returns the log variance instead of the variance. Returns: (tf.Tensors) The mean and variance/log variance predictions at inputs for each network in the ensemble. """ dim_output = self.layers[-1].get_output_dim() cur_out = self.scaler.transform(inputs) for layer in self.layers: cur_out = layer.compute_output_tensor(cur_out) mean = cur_out[:, :, :dim_output // 2] if self.end_act is not None: mean = self.end_act(mean) logvar = self.max_logvar - tf.nn.softplus( self.max_logvar - cur_out[:, :, dim_output // 2:]) logvar = self.min_logvar + tf.nn.softplus(logvar - self.min_logvar) if ret_log_var: return mean, logvar else: return mean, tf.exp(logvar) def _compile_losses(self, inputs, targets, inc_var_loss=True): """Helper method for compiling the loss function. The loss function is obtained from the log likelihood, assuming that the output distribution is Gaussian, with both mean and (diagonal) covariance matrix being determined by network outputs. Arguments: inputs: (tf.Tensor) A tensor representing the input batch targets: (tf.Tensor) The desired targets for each input vector in inputs. inc_var_loss: (bool) If True, includes log variance loss. Returns: (tf.Tensor) A tensor representing the loss on the input arguments. """ mean, log_var = self._compile_outputs(inputs, ret_log_var=True) inv_var = tf.exp(-log_var) if inc_var_loss: mse_losses = tf.reduce_mean(tf.reduce_mean( tf.square(mean - targets) * inv_var, axis=-1), axis=-1) var_losses = tf.reduce_mean(tf.reduce_mean(log_var, axis=-1), axis=-1) total_losses = mse_losses + var_losses else: total_losses = tf.reduce_mean(tf.reduce_mean(tf.square(mean - targets), axis=-1), axis=-1) return total_losses