def call(self, x, mask=None): if K.backend() == 'theano': return K.softplus( K.pattern_broadcast(self.beta, self.param_broadcast) * x) * K.pattern_broadcast(self.alpha, self.param_broadcast) else: return K.softplus(self.beta * x) * self.alpha
def call(self, inputs, **kwargs): kernel_sigma = K.softplus(self.kernel_rho) kernel = self.kernel_mu + kernel_sigma * K.random_normal( self.kernel_mu.shape) bias_sigma = K.softplus(self.bias_rho) bias = self.bias_mu + bias_sigma * K.random_normal(self.bias_mu.shape) self.add_loss( self.kl_loss(kernel, self.kernel_mu, kernel_sigma) + self.kl_loss(bias, self.bias_mu, bias_sigma)) return self.activation(K.dot(inputs, kernel) + bias)
def call(self, x): # Construct the pairwise distance matrix D = pairwise_dists(x, x, epsilon=self.epsilon) # get the max intra-class distance for each sample max_pos = [ K.max(K.tf.slice(D, begin=[i * self.k, i * self.k], size=[self.k, self.k]), axis=1) for i in range(self.p) ] max_pos = K.concatenate(max_pos, axis=0) # get the min inter-class distance for each sample min_neg = [] for i in range(self.p): left = K.tf.slice(D, begin=[i * self.k, 0], size=[self.k, i * self.k]) right = K.tf.slice(D, begin=[i * self.k, (i + 1) * self.k], size=[self.k, (self.p - i - 1) * self.k]) min_neg.append(K.min(K.concatenate([left, right], axis=1), axis=1)) min_neg = K.concatenate(min_neg, axis=0) if self.use_softplus: return K.mean(K.softplus(self.margin + max_pos - min_neg)) else: return K.mean(K.relu(self.margin + max_pos - min_neg))
def custom_loss(y_true, y_pred): coefs = [ 7.718, 2.1184316, 1.7462137, 2.7549687, 4.7066404, 7.6163553, 11.723778 ] pt_true = y_true[2] loss_total = (y_pred - y_true + K.softplus(-2. * (y_pred - y_true)) - K.log(2.)) loss = K.switch( tf.math.logical_and(tf.greater(pt_true, 3), tf.less(pt_true, 4)), tf.math.multiply(loss_total, coefs[0]), K.switch( tf.less(pt_true, 5), tf.math.multiply(loss_total, coefs[1]), K.switch( tf.less(pt_true, 6), tf.math.multiply(loss_total, coefs[2]), K.switch( tf.less(pt_true, 7), tf.math.multiply(loss_total, coefs[3]), K.switch( tf.less(pt_true, 8), tf.math.multiply(loss_total, coefs[4]), K.switch( tf.less(pt_true, 9), tf.math.multiply(loss_total, coefs[5]), K.switch(tf.less(pt_true, 10), tf.math.multiply(loss_total, coefs[6]), loss_total))))))) loss = K.mean(loss) return loss
def word2vec_loss(y_true, y_pred): # y_true is label (0 or 1) # y_pred is the dot prod # 0 / 1 -> 1. -> -1. a = (K.cast(y_true, dtype='float32') * 2 - 1.0) * (-1.0) return K.softplus(a * y_pred)
def mish(x, fast=False): if fast: # faster but requires extra storage y = K.exp(-x) z = 1 + 2 * y return x * z / (z + 2 * y * y) #return x * tf.math.tanh(tf.math.softplus(x)) return x * K.tanh(K.softplus(x))
def discriminate_real(y_output, batch_size=batch_size): # logD(x) = logZ(x) - log(Z(x) + 1) where Z(x) = sum_{k=1}^K exp(l_k(x)) log_zx = K.logsumexp(y_output, axis=1) log_dx = log_zx - K.softplus(log_zx) dx = K.sum(K.exp(log_dx)) / batch_size loss = -K.sum(log_dx) / batch_size return loss, dx
def _get_weight_vector(self,M,w,head): cur = 0 # split everything out k = head[:,cur:self.M] cur += self.M b = head[:,cur] cur += 1 g = head[:,cur] cur += 1 s = head[:,cur: cur + self.num_shift] cur += self.num_shift t = head[:,cur] # do the activations of the head # ref: https://blog.wtf.sg/2014/11/09/neural-turing-machines-implementation-hell/ b = K.exp(b) g = K.sigmoid(g) s = K.softmax(s) t = K.softplus(t) + 1 # DEBUG-ing purpose: # for _ in ['M','w','k','b','g','s','t']: # print(_,eval(_)) weight = _get_weight(M,w,k,b,g,s,t) return weight
def neg_log_likelihood(y_true, y_pred): y_true = y_true[:, 0] mean = y_pred[:, 0] variance = K.softplus(y_pred[:, 1]) + 1e-6 log_variance = K.log(variance) return 0.5 * K.mean(log_variance, axis=-1) + 0.5 * K.mean( K.square(y_true - mean) / variance, axis=-1) + 0.5 * K.log(2 * np.pi)
def dense(x, w, b, act): x = K.dot(x, w) if b: x = K.bias_add(x, b) if act.lower().strip() == 'softmax': x = K.softmax(x) elif act.lower().strip() == 'elu': x = K.elu(x) elif act.lower().strip() == 'gelu': x = 0.5 * x * (1 + K.tanh( np.sqrt(2 / np.pi) * (x + 0.044715 * K.pow(x, 3)))) elif act.lower().strip() == 'selu': alpha = 1.6732632423543772848170429916717 scale = 1.0507009873554804934193349852946 x = scale * K.elu(x, alpha) elif act.lower().strip() == 'softplus': x = K.softplus(x) elif act.lower().strip() == 'softsign': x = K.softsign(x) elif act.lower().strip() == 'relu': x = K.relu(x) elif act.lower().strip() == 'leaky_relu': x = K.relu(x, alpha=0.01) elif act.lower().strip() == 'tanh': x = K.tanh(x) elif act.lower().strip() == 'sigmoid': x = K.sigmoid(x) elif act.lower().strip() == 'hard_sigmoid': x = K.hard_sigmoid(x) return x
def pinball_loss(tau, y, q, alpha=0.01, smooth_loss=1, kappa=0, margin=0): error = (y - q) diff = q[:, 1:] - q[:, :-1] if smooth_loss == 0: # pinball function quantile_loss = K.mean(K.maximum(tau * error, (tau - 1) * error)) elif smooth_loss == 1: # smooth pinball function quantile_loss = K.mean(tau * error + alpha * K.softplus(-error / alpha)) elif smooth_loss == 2: # huber norm approximation epsilon = 2**-8 # if K.abs(error) > epsilon: # u = K.abs(error) - epsilon / 2 # else: # u = (error**2) / (2 * epsilon) logic = K.cast((K.abs(error) > epsilon), dtype='float64') u = (K.abs(error) - epsilon / 2) * logic + ( (error**2) / (2 * epsilon)) * (1 - logic) quantile_loss = K.mean(K.maximum(tau * u, (tau - 1) * u)) # penalty = -kappa * K.mean(alpha2*K.softplus(-diff / alpha2)) # penalty = K.mean(K.maximum(tf.Variable(tf.zeros([1], dtype=tf.float64)), margin - diff)) * kappa penalty = kappa * K.mean( tf.square( K.maximum(tf.Variable(tf.zeros([1], dtype=tf.float64)), margin - diff))) return quantile_loss + penalty
def activate(ab): a = k.exp(ab[:, 0]) b = k.softplus(ab[:, 1]) a = k.reshape(a, (k.shape(a)[0], 1)) b = k.reshape(b, (k.shape(b)[0], 1)) return k.concatenate((a, b), axis=1)
def softplus(x): """ Softplus activation function. >>> round(softplus(0), 1) 0.7 """ return K.eval(K.softplus(K.variable(x))).tolist()
def call(self, x): if not self.afixed: aloc = K.softplus(self.a - 4) * x else: aloc = self.a * x if self.useb: aloc += self.b return aloc
def neg_log_likelihood(truth_n, pred_nx2): truth_n = truth_n[:, 0] mean_n = pred_nx2[:, 0] var_n = K.softplus(pred_nx2[:, 1]) + 1e-6 logvar_n = K.log(var_n) nll_n = 0.5 * K.mean(logvar_n, axis=-1) + 0.5 * K.mean(K.square(truth_n - mean_n) / var_n, axis=-1) + \ 0.5 * K.log(2 * np.pi) return nll_n
def getOptmizer(self): adamOptmizer = Adam(lr=self.learning_rate) state = K.placeholder(shape=(None, 28)) nextState = K.placeholder(shape=(None, 28)) actionProb = K.placeholder(shape=(None, 200)) state_d = K.placeholder(shape=(None, 28)) nextState_d = K.placeholder(shape=(None, 28)) actionProb_d = K.placeholder(shape=(None, 200)) gamma = K.variable(self.gamma) stateValues = K.function([self.actor.input], self.actor.output) rewardValue = K.function([self.rewardNetwork.input], self.rewardNetwork.output) reward = rewardValue(state) stateValue = stateValues(state) nextStateValue = stateValue(nextState) reward_d = rewardValue(state_d) stateValue_d = stateValues(state_d) nextStateValue_d = stateValue(nextState_d) logits = reward + gamma * nextStateValue - stateValue - actionProb logits_d = reward_d + gamma * nextStateValue_d - stateValue_d - actionProb_d loss = K.mean(K.softplus(-(logits))) + K.mean(K.softplus((logits_d))) updatesOnline = adamOptmizer.get_updates(self.actor.trainable_weights, [], loss) updatesReward = adamOptmizer.get_updates( self.rewardNetwork.trainable_weights, [], loss) self.updateOnline = K.function( [state, nextState, actionProb, state_d, nextState_d, actionProb_d], loss, updates=updatesOnline) self.updateReward = K.function( [state, nextState, actionProb, state_d, nextState_d, actionProb_d], loss, updates=updatesReward)
def kokon_loss(y_true, y): dim0 = K.shape(y)[0] loss = tf.fill(tf.stack([dim0]), 0.0) for i in range(18): for j in range(18): #for j in range(i+1,18): softplus = K.softplus(y[:, j] - y[:, i]) tanh = K.tanh(y_true[:, j] - y_true[:, i]) loss = loss + K.clip(18 + softplus * tanh, 0.0, 18.0) loss = K.reshape(loss, [-1, 1]) / (18 * 18) return loss
def call(self, x): z, log_alpha, log_beta = x log_alpha = K.clip(log_alpha, -64, 64) log_beta = K.clip(log_beta, -64, 64) alpha = K.exp(log_alpha) beta = K.exp(log_beta) a = K.softplus(self.a - 4) loss = -alpha * log_beta + ( alpha + z / a) * tf.log(1 + beta) - tf.lgamma( alpha + z / a) + tf.lgamma(alpha) + tf.lgamma(z / a + 1) posterior_mean = (a * alpha + z) / (beta + 1) return K.concatenate([loss, posterior_mean], axis=-1)
def D_logistic_simplegp(x_real_score, x_fake_score, x_real, x_fake, r1_gamma=10.0, r2_gamma=0.0): d_loss = K.mean(K.softplus(x_fake_score) - K.softplus(x_real_score)) if r1_gamma != 0.0: with K.name_scope('R1Penalty'): r1_grads = K.gradients(x_real_score, [x_real])[0] r1_grads_norms = K.sqrt(K.sum(r1_grads**2, axis=[1, 2, 3]) + 1e-8) d_loss += r1_grads_norms * (r1_gamma * 0.5) if r2_gamma != 0.0: with K.name_scope('R2Penalty'): r2_grads = K.gradients(x_fake_score, [x_fake])[0] r2_grads_norms = K.sqrt(K.sum(r2_grads**2, axis=[1, 2, 3]) + 1e-8) d_loss += r2_grads_norms * (r1_gamma * 0.5) return d_loss
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.learning_rate if self.initial_decay > 0: lr = lr * (1. / (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay)))) t = K.cast(self.iterations, K.floatx()) + 1 lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t))) ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p), name='m_' + str(i)) for (i, p) in enumerate(params)] vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p), name='v_' + str(i)) for (i, p) in enumerate(params)] if self.amsgrad: vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p), name='vhat_' + str(i)) for (i, p) in enumerate(params)] else: vhats = [K.zeros(1, name='vhat_' + str(i)) for i in range(len(params))] self.weights = [self.iterations] + ms + vs + vhats for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats): m_t = (self.beta_1 * m) + (1. - self.beta_1) * g v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) if self.amsgrad: vhat_t = K.maximum(vhat, v_t) p_t = p - lr_t * m_t / (K.sqrt(vhat_t) + self.epsilon) self.updates.append(K.update(vhat, vhat_t)) else: p_t = p - lr_t * m_t / K.softplus ((K.sqrt(v_t)) + self.epsilon) self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def triplet_loss(_, y_pred): ''' Assume: y_pred shape is (batch_size, 2) ''' margin = K.constant(triplet_margin) subtraction = K.constant([1, -1], shape=(2, 1)) diff = K.dot(K.square(y_pred), subtraction) #loss = K.maximum(K.constant(0), margin + diff) loss = K.softplus(diff) return loss
def transform_z0(args): z0, w, u, b = args b2 = K.squeeze(b, 1) beta = K.sum(tf.multiply(w, z0), 1) # change u2 so that the transformation z0->z1 is invertible alpha = K.sum(tf.multiply(w, u), 1) diag1 = tf.diag(K.softplus(alpha) - 1 - alpha) u2 = u + K.dot(diag1, w) / K.sum(K.square(w)+1e-7) diag2 = tf.diag(K.tanh(beta + b2)) # generate z1 z1 = z0 + K.dot(diag2,u2) return z1
def activate(ab_pred): ''' Keras doesn't support applying different activation functions to the individual neurons. Thankfully, a custom activation function takes care of this... ''' from keras import backend as K a = K.exp(ab_pred[:, 0]) b = K.softplus(ab_pred[:, 1]) a = K.reshape(a, (K.shape(a)[0], 1)) b = K.reshape(b, (K.shape(b)[0], 1)) return K.concatenate((a, b), axis=1)
def logdet_loss(args): z0, w, u, b = args b2 = K.squeeze(b, 1) beta = K.sum(tf.multiply(w, z0), 1) # <w|z0> linear_trans = beta + b2 # <w|z0> + b # change u2 so that the transformation z0->z1 is invertible alpha = K.sum(tf.multiply(w, u), 1) # diag1 = tf.diag(K.softplus(alpha) - 1 - alpha) u2 = u + K.dot(diag1, w) / K.sum(K.square(w)+1e-7) gamma = K.sum(tf.multiply(w,u2), 1) logdet = K.log(K.abs(1 + (1 - K.square(K.tanh(linear_trans)))*gamma) + 1e-6) return logdet
def get_triplet_batch_hard_loss(batch_size, margin): if margin == 'soft': print("Using soft-margin in batch-hard loss") final_loss_tensor = lambda hard_pos, hard_neg: K.softplus(hard_pos - hard_neg) else: try: margin = float(margin) print("Using hard-margin of {} in batch-hard loss".format(margin)) final_loss_tensor = lambda hard_pos, hard_neg: K.maximum( hard_pos - hard_neg + margin, 0) except ValueError: raise util.ScrnaException( 'Batch hard margin must be a real number or "soft"!') def triplet_batch_hard_loss(y_true, y_pred): # y_pred is the embedding, y_true is the IDs (labels) of the samples (not 1-hot encoded) # They are mini-batched. If batch_size is B, and embedding dimension is D, shapes are: # y_true: (B,) # y_pred: (B,D) # Get all-pairs distances y_true = K.sum(y_true, axis=1) diffs = K.expand_dims(y_pred, axis=1) - K.expand_dims(y_pred, axis=0) dist_mat = K.sqrt(K.sum(K.square(diffs), axis=-1) + K.epsilon()) same_identity_mask = K.equal(K.expand_dims(y_true, axis=1), K.expand_dims(y_true, axis=0)) # TODO: make this backend-agnostic somehow negative_mask = T.bitwise_not(same_identity_mask) # XOR ensures that the same sample is paired with itself positive_mask = T.bitwise_xor(same_identity_mask, K.eye(batch_size, dtype='bool')) #print(K.int_shape(y_true)) #print(K.int_shape(y_pred)) #positive_mask = T.bitwise_xor(same_identity_mask, T.eye(K.int_shape(y_true)[0])) furthest_positive = K.max(dist_mat * positive_mask, axis=1) #closest_negative = K.min(dist_mat*negative_mask + np.inf*same_identity_mask, axis=1) closest_negative = K.min(dist_mat * negative_mask + 1e6 * same_identity_mask, axis=1) loss = final_loss_tensor(furthest_positive, closest_negative) return loss return triplet_batch_hard_loss
def pinball_loss(tau, y, q, alpha): """ Smooth Pinball loss function. Arguments: tau (ndarray) - quantile levels y (ndarray) - time series observations q (ndarray) - quantile predictions alpha (float) - smoothing rate Returns: quantile_loss (tensor) - loss """ error = (y - q) quantile_loss = K.mean(tau * error + alpha * K.softplus(-error / alpha)) return quantile_loss
def univariate_gaussian(true, pred): """ Generic, rank-agnostic bivariate gaussian function Returns results of eq # 24 of http://arxiv.org/abs/1308.0850 :param true: truth values with at least [mu] :param pred: values predicted with at least [mu, sigma] :return: probability density function """ x = true[..., 0] mu = pred[..., 0] sigma = pred[..., 1] norm = K.log(1 + K.abs(x - mu)) # needs log of norm to counter large mu diffs variance = K.softplus(K.square(sigma)) z = K.exp(-K.square(K.abs(norm)) / (2 * variance) + epsilon()) # z -> 0 if sigma # pdf -> 0 if sigma is very large or z -> 0; NaN if variance -> 0 pdf = z / K.sqrt((2 * np.pi * variance) + epsilon()) return pdf
def call(self, x, mask=None): a_scaler, b_scaler = tf.unstack(x, 2, -1) a_scaler = K.tanh(2 * a_scaler) b_scaler = K.tanh(2 * b_scaler) a = self.a_bias + (self.a_scale * a_scaler) b = self.b_bias + (self.b_scale * b_scaler) a = K.exp(a) b = K.softplus(b) a = K.clip(a, .8, 100.) b = K.clip(b, .8, 10.) b = K.pow(b, -1.) * (a + 2.) * 2. x = K.stack([a, b], axis=-1) return x
def __call__(self, y_true: plaidml.tile.Value, y_pred: plaidml.tile.Value) -> plaidml.tile.Value: """ Call the LogCosh loss function. Parameters ---------- y_true: :class:`plaidml.tile.Value` The ground truth value y_pred: :class:`plaidml.tile.Value` The predicted value Returns ------- :class:`plaidml.tile.Value` The loss value """ diff = y_pred - y_true loss = diff + K.softplus(-2. * diff) - K.log( K.constant(2., dtype="float32")) return K.mean(loss, axis=-1)
def mean_var_blindspot_network(input_shape): # create input layer inputs = Input(input_shape) # run blindspot network x = blindspot_network(inputs) mean = Conv2D(1, 1, name='mean')(x) var = Conv2D(1, 1, name='var')(x) scale = Lambda(lambda x: K.softplus(x) + 1e-3)(var) # create model model = Model(inputs=inputs, outputs=mean) # create loss function loss = mean_var_loss(inputs, mean, var) model.add_loss(loss) return model
from keras import backend as K from keras.regularizers import ActivityRegularizer import numpy as np dummy_loss_val = K.variable(0.0) softminus = lambda x: x - K.softplus(x) # Dummy loss function which simply returns 0 # This is because we will be training the network using regularizers. def dummy_loss(y_true, y_pred): return dummy_loss_val def psnr(y_true, y_pred): assert y_true.shape == y_pred.shape, "Cannot calculate PSNR. Input shapes not same." \ " y_true shape = %s, y_pred shape = %s" % (str(y_true.shape), str(y_pred.shape)) return -10. * np.log10(np.mean(np.square(y_pred - y_true))) def PSNRLoss(y_true, y_pred): """ PSNR is Peek Signal to Noise Ratio, which is similar to mean squared error. It can be calculated as PSNR = 20 * log10(MAXp) - 10 * log10(MSE) When providing an unscaled input, MAXp = 255. Therefore 20 * log10(255)== 48.1308036087. However, since we are scaling our input, MAXp = 1. Therefore 20 * log10(1) = 0. Thus we remove that component completely and only compute the remaining MSE component.
def call(self, x, mask=None): from keras import backend as K j = K.softplus((x - 1) / self.sigma) * self.sigma v = self.amplitude / (self.tau_ref + self.tau_rc*K.log(1 + 1/j)) return K.switch(j > 0, v, 0)
def call(self, x, mask=None): if K.backend() == 'theano': return K.softplus(K.pattern_broadcast(self.beta, self.param_broadcast) * x) * K.pattern_broadcast(self.alpha, self.param_broadcast) else: return K.softplus(self.beta * x) * self.alpha