def _build(self, prediction_model, drop_one_logit=False): y = prediction_model.y logits = prediction_model.logits loss_per_sample = tf.reduce_mean(tf.square(y - logits), axis=1) loss = tf.reduce_mean(loss_per_sample) # First panel will be at screen during traininig list_of_vpanels_of_plots = [[ { 'nodes': [loss], 'names': ["mse"], 'output': { 'fileName': "mse" } }, ]] nodes_to_log, names_of_nodes_to_log, filenames_to_log_to = create_panels_lists( list_of_vpanels_of_plots) return loss, loss_per_sample, nodes_to_log, names_of_nodes_to_log, filenames_to_log_to
def _build(self, model, drop_one_logit=False): y = model.y shaper = tf.shape(y) # if len(y.shape)==1: # y = tf.expand_dims(y, axis=-1) kl_losses = model.kl_losses total_KL = tf.reduce_sum(kl_losses) / model.dataset.n_samples_train alpha = self._alpha_parameter n_samples = model.n_samples_ph logits = model.prediction_distr.logits probs = model.prediction_distr.probs n_labels = logits.shape[1] y_tile = tf.tile(y, [n_samples]) y_true = tf.one_hot(y_tile, n_labels) def alphaloss(y_true, logits): logits_reshaped = tf.reshape(logits, (n_samples, shaper[0], n_labels)) y_true_reshaped = tf.reshape(y_true, (n_samples, shaper[0], n_labels)) y_true_reshaped = tf.cast(y_true_reshaped, tf.float32) log_solfmax = logits_reshaped - tf.reduce_max( logits_reshaped, axis=2, keepdims=True) log_solfmaxT = log_solfmax - tf.reduce_logsumexp( log_solfmax, axis=2, keepdims=True) log_cross_entropy = tf.reduce_sum( tf.multiply(y_true_reshaped, log_solfmaxT), -1) loss = -1. / alpha * ( tf.reduce_logsumexp(alpha * log_cross_entropy, 0) - tf.log(tf.cast(n_samples, tf.float32))) return loss if alpha is None: loss_per_sample = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=tf.cast(y_tile, tf.int32), logits=logits) else: loss_per_sample = alphaloss(y_true, logits) ce = tf.reduce_mean(loss_per_sample) loss = ce + total_KL accuracy = tf.reduce_mean( tf.cast(tf.equal(tf.argmax(logits, axis=1), tf.cast(y_tile, dtype=tf.int64)), dtype=tf.float32)) # First panel will be at screen during training list_of_vpanels_of_plots = [ [ # { # 'nodes' : [loss], # 'names': ["loss"], # 'fileName' : "loss" # }, { 'nodes': [ce], 'names': ["ce"], 'output': { 'fileName': "ce" } }, { 'nodes': [total_KL], 'names': ["kl"], 'output': { 'fileName': "kl" } }, { 'nodes': [accuracy], 'names': ["accuracy"], 'output': { 'fileName': "accuracy" } }, ], [ { 'nodes': [1 - accuracy], 'names': ["error"], 'output': { 'fileName': "error", "logscale-y": 1 } }, ] ] # nodes_to_log = [[ce], # [total_KL], # [1 - accuracy], # [accuracy]] # # nodes_to_log_names = [["ce"], ["kl"], ["error"], ["accuracy"]] # nodes_to_log_filenames = [{"fileName": "ce"}, # {"fileName": "kl"}, # {"fileName": "error", "logscale-y": 1}, # {"fileName": "accuracy"}] if self._multiclass_metrics: y_pred = tf.one_hot(tf.argmax(logits, axis=1), n_labels) y_true = tf.one_hot(y_tile, n_labels) f1_micro, f1_macro, f1_weighted = tf_f1_score(y_true, y_pred) auc, auc_update = tf.metrics.auc( labels=tf.cast(y_true, dtype=tf.float32), # predictions=tf.nn.softmax(logits) predictions=probs) raise Exception("set panels correctly here first!") # nodes_to_log += [[auc_update], # [f1_micro, f1_macro, f1_weighted]] # # nodes_to_log_names += [["auc"], ["f1_micro", "f1_macro", "f1_weighted"]] # nodes_to_log_filenames += [ # {"fileName": "auc"}, # {"fileName": "f1_score"} # # {"fileName": "f1_micro"}, # # {"fileName": "f1_macro"}, # # {"fileName": "f1_weighted"} # ] nodes_to_log, names_of_nodes_to_log, filenames_to_log_to = create_panels_lists( list_of_vpanels_of_plots) return loss, loss_per_sample, nodes_to_log, names_of_nodes_to_log, filenames_to_log_to
def _build(self, model): #, drop_one_logit=False): n_samples=model.n_samples_ph y = model.y shaper = tf.shape(y) distr = model.prediction_distr if self._use_alpha: if self._alpha_parameter!=0: y_tile = tf.tile(y, [n_samples, 1]) loss_core = -distr.log_prob(y_tile) #loss_per_minibatch = tf.exp(tf.scalar_mul(self._alpha_parameter,distr.log_prob(y_tile))) #loss_per_minibatch_reshaped=tf.reshape(loss_per_minibatch, (alpha_samples,shaper[0])) #loss_per_minibatch_avg=tf.reduce_mean(loss_per_minibatch_reshaped,axis=0) #loss_per_sample=tf.scalar_mul(-1./self._alpha_parameter,tf.log(loss_per_minibatch_avg)) loss_per_minibatch = tf.scalar_mul(self._alpha_parameter,distr.log_prob(y_tile)) #import pdb; pdb.set_trace() loss_per_minibatch_reshaped=tf.reshape(loss_per_minibatch, (n_samples, shaper[0])) loss_per_minibatch_avg=tf.reduce_logsumexp(loss_per_minibatch_reshaped,axis=0) loss_per_sample=tf.scalar_mul(-1./self._alpha_parameter,loss_per_minibatch_avg) else: y_tile = tf.tile(y, [n_samples, 1]) loss_core = -distr.log_prob(y_tile) loss_per_minibatch = -distr.log_prob(y_tile) loss_per_minibatch_reshaped=tf.reshape(loss_per_minibatch, (n_samples, shaper[0])) loss_per_sample=tf.reduce_mean(loss_per_minibatch_reshaped, axis=0) else: loss_per_sample = -distr.log_prob(y) loss_core = loss_per_sample nll = tf.reduce_mean(loss_per_sample, name="nll") kl_losses = model.kl_losses total_KL = tf.reduce_sum(kl_losses) / model.dataset.n_samples_train loss = nll + total_KL nll_core = tf.reduce_mean(loss_core, name="nll_core") # in case of Bayesian network I need to add kl_losses for the weights if I want to see them # (otherwise kl_losses will be an empty list for non bayesian predictions) # if kl_losses: # # KL_i_names = ["KL_" + str(int(i+1)) for i, l in enumerate(kl_losses)] # # nodes_to_log = [[loss], # [nll], # # [total_KL], # # kl_losses # ] # # names_of_nodes_to_log = [["loss"], # ["NLL"], # # ["total_KL"], # # KL_i_names # ] # # filenames_to_log_to = [{"fileName" : "loss"}, # {"fileName" : "negloglikelihood"}, # # {"fileName" : "total_KL"}, # # {"fileName" : "all_KLs", "legend": 0} # ] # # else: means = model.prediction_mean # if self._use_alpha: means=tf.reshape(means, (n_samples,shaper[0],shaper[1])) means=tf.reduce_mean(means,axis=0) # else: # pass mse_per_sample = tf.reduce_sum(tf.square(y - means), axis=1) mse = tf.reduce_mean(mse_per_sample) # First panel will be at screen during training list_of_vpanels_of_plots = [ [ { 'nodes' : [loss], 'names': ["loss"], 'output': {'fileName' : "loss"} }, { 'nodes': [nll], 'names': ["NLL"], 'output': {'fileName': "negloglikelihood"} }, { 'nodes': [mse], 'names': ["mse"], 'output': {'fileName': "mse"} } ] ] nodes_to_log, names_of_nodes_to_log, filenames_to_log_to = create_panels_lists(list_of_vpanels_of_plots) # nodes_to_log = [[loss], [nll], [mse], [loss_core]] # # names_of_nodes_to_log = [["loss"], ["NLL"], ["MSE"], ["loss_core"]] # # filenames_to_log_to = [{"fileName": "loss"}, # {"fileName": "negloglikelihood"}, # {"fileName": "mse"}, # {"fileName": "loss_core"} # ] return loss, loss_per_sample, nodes_to_log, names_of_nodes_to_log, filenames_to_log_to
def _build(self, vae): prior = vae._prior approximate_posterior = vae._approximate_posterior vq_losses = vae.net_losses perplexity = vq_losses["perplexity"] vq_loss = vq_losses["vq_loss"] model_visible = vae._model_visible x_target = vae.x_target n_z_samples = 1 warm_up = self.get_warm_up_coefficient(self._warm_up_method, vae) rec_nll = self.reconstruction_loss(x_target, n_z_samples, model_visible, vae) KL = self.latent_loss(approximate_posterior, prior) # use kl only if specified, original VQ-VAE paper doesn't use kl_loss = warm_up * self.beta * KL if self._use_kl else 0. cost = kl_loss + vq_loss + rec_nll dim_with_channels = tf.cast(tf.reduce_prod(tf.shape(x_target)[1:]), tf.float32) # TODO I think this formula is still not correct # check https://www.reddit.com/r/MachineLearning/comments/56m5o2/discussion_calculation_of_bitsdims/ bits_dim = ( (rec_nll / dim_with_channels) + tf.log(256.0 / 2.0)) / tf.log(2.0) # First panel will be at screen during training list_of_vpanels_of_plots = [[ { 'nodes': [cost], 'names': ["loss"], 'output': { 'fileName': "loss" } }, { 'nodes': [bits_dim], 'names': ["bd"], 'output': { 'fileName': "bits_dim" } }, ], [ { 'nodes': [KL], 'names': ["KL"], 'output': { 'fileName': "kl" } }, { 'nodes': [rec_nll], 'names': ["RL"], 'output': { 'fileName': "rl" } }, { 'nodes': [perplexity], 'names': ["perplexity"], 'output': { 'fileName': "perplexity" } }, ]] nodes_to_log, names_of_nodes_to_log, filenames_to_log_to = create_panels_lists( list_of_vpanels_of_plots) return cost, nodes_to_log, names_of_nodes_to_log, filenames_to_log_to
def _build(self, prediction_model): #, drop_one_logit=False): y = prediction_model.y logits = prediction_model.logits n_labels = logits.shape[1] # do not delete yet (Luigi) ''' if drop_one_logit: n = logits.get_shape().as_list()[1] else: ''' loss_per_sample = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=tf.cast(y, tf.int32), logits=logits) if self._nbp: @tf.custom_gradient def nbp(x): #pdb.set_trace() def grad(dy): I_theta = tf.stop_gradient(tf.nn.softmax(logits)) #tf.tensordot(tf.linalg.inv(I_theta),dy, axes=0) #tf.dot(tf.linalg.inv(I_theta),dy) #print_op = tf.Print(tf.shape(dy), [tf.shape(dy)]) #with tf.control_dependencies([print_op]): # return dy #tf.matmul(tf.diag(tf.linalg.inv(I_theta)), dy) #return tf.matmul(tf.linalg.inv(I_theta), dy) pdb.set_trace() return tf.matmul(I_theta, dy) return tf.identity(x), grad loss_per_sample = nbp(loss_per_sample) loss = tf.reduce_mean(loss_per_sample) accuracy = tf.reduce_mean( tf.cast(tf.equal(tf.argmax(logits, axis=1), tf.cast(y, dtype=tf.int64)), dtype=tf.float32)) # First panel will be at screen during training list_of_vpanels_of_plots = [ [ # { # 'nodes' : [loss], # 'names': ["loss"], # 'fileName' : "loss" # }, { 'nodes': [loss], 'names': ["ce"], 'output': { 'fileName': "ce" } }, { 'nodes': [accuracy], 'names': ["accuracy"], 'output': { 'fileName': "accuracy" } }, ], [ { 'nodes': [1 - accuracy], 'names': ["error"], 'output': { 'fileName': "error", "logscale-y": 1 } }, ] ] # nodes_to_log = [[1 - accuracy], # [accuracy]] # nodes_to_log_names = [["err"], ["acc"]] # nodes_to_log_filenames = [{"fileName": "error", "logscale-y": 1}, # {"fileName": "accuracy"}] if self._multiclass_metrics: y_pred = tf.one_hot(tf.argmax(logits, axis=1), n_labels) y_true = tf.one_hot(y, n_labels) f1_micro, f1_macro, f1_weighted = tf_f1_score(y_true, y_pred) auc, auc_update = tf.metrics.auc(labels=tf.cast(y_true, dtype=tf.float32), predictions=tf.nn.softmax(logits)) raise Exception("set panels correctly here first!") # nodes_to_log += [[auc_update], # [f1_micro, f1_macro, f1_weighted]] # # nodes_to_log_names += [["auc"], ["f1_micro", "f1_macro", "f1_weighted"]] # nodes_to_log_filenames += [ # {"fileName": "auc"}, # {"fileName": "f1_score"} # # {"fileName": "f1_micro"}, # # {"fileName": "f1_macro"}, # # {"fileName": "f1_weighted"} # ] nodes_to_log, names_of_nodes_to_log, filenames_to_log_to = create_panels_lists( list_of_vpanels_of_plots) return loss, loss_per_sample, nodes_to_log, names_of_nodes_to_log, filenames_to_log_to