def dense(x, w, b, act): x = K.dot(x, w) if b: x = K.bias_add(x, b) if act.lower().strip() == 'softmax': x = K.softmax(x) elif act.lower().strip() == 'elu': x = K.elu(x) elif act.lower().strip() == 'gelu': x = 0.5 * x * (1 + K.tanh( np.sqrt(2 / np.pi) * (x + 0.044715 * K.pow(x, 3)))) elif act.lower().strip() == 'selu': alpha = 1.6732632423543772848170429916717 scale = 1.0507009873554804934193349852946 x = scale * K.elu(x, alpha) elif act.lower().strip() == 'softplus': x = K.softplus(x) elif act.lower().strip() == 'softsign': x = K.softsign(x) elif act.lower().strip() == 'relu': x = K.relu(x) elif act.lower().strip() == 'leaky_relu': x = K.relu(x, alpha=0.01) elif act.lower().strip() == 'tanh': x = K.tanh(x) elif act.lower().strip() == 'sigmoid': x = K.sigmoid(x) elif act.lower().strip() == 'hard_sigmoid': x = K.hard_sigmoid(x) return x
def map_fn(i): std_samples = K.transpose(dist.sample(num_classes)) distorted_loss = K.categorical_crossentropy(pred + std_samples, true, from_logits=True) diff = undistorted_loss - distorted_loss return -K.elu(diff)
def el(x, eps=None): """ Exponential linear unit. """ if eps is None: eps = 0.0 act = lambda x: K.elu(x) + 1. + eps return act(x)
def selu(x): """Scaled Exponential Linear Unit. (Klambauer et al., 2017) # Arguments x: A tensor or variable to compute the activation function for. # References - [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515) """ alpha = 1.6732632423543772848170429916717 scale = 1.0507009873554804934193349852946 return scale * K.elu(x, alpha)
def elu_loss(y_true, y_pred): """ELU loss. This loss is the probability gap activated by the ELU activation. # Arguments y_true: tensor of true targets. y_pred: tensor of predicted targets. # Returns Tensor with one scalar loss entry per sample. """ dp = K.sum(y_true * y_pred, axis=-1) dm = K.max(y_pred - y_true, axis=-1) return K.elu(dm - dp)
def elu(x, alpha=1.0): """Exponential Linear Unit. The exponential linear unit (ELU) with `alpha > 0` is: `x` if `x > 0` and `alpha * (exp(x) - 1)` if `x < 0` The ELU hyperparameter `alpha` controls the value to which an ELU saturates for negative net inputs. ELUs diminish the vanishing gradient effect. ELUs have negative values which pushes the mean of the activations closer to zero. Mean activations that are closer to zero enable faster learning as they bring the gradient closer to the natural gradient. ELUs saturate to a negative value when the argument gets smaller. Saturation means a small derivative which decreases the variation and the information that is propagated to the next layer. Example Usage: >>> import tensorflow as tf >>> model = tf.keras.Sequential() >>> model.add(tf.keras.layers.Conv2D(32, (3, 3), activation='elu', ... input_shape=(28, 28, 1))) >>> model.add(tf.keras.layers.MaxPooling2D((2, 2))) >>> model.add(tf.keras.layers.Conv2D(64, (3, 3), activation='elu')) >>> model.add(tf.keras.layers.MaxPooling2D((2, 2))) >>> model.add(tf.keras.layers.Conv2D(64, (3, 3), activation='elu')) <tensorflow.python.keras.engine.sequential.Sequential object ...> Args: x: Input tensor. alpha: A scalar, slope of negative section. `alpha` controls the value to which an ELU saturates for negative net inputs. Returns: The exponential linear unit (ELU) activation function: `x` if `x > 0` and `alpha * (exp(x) - 1)` if `x < 0`. Reference: [Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs) (Clevert et al, 2016)](https://arxiv.org/abs/1511.07289) """ return backend.elu(x, alpha)
def call(self, inputs, mask=None): # dense layer for mu (mean) of the gaussians mu_output = K.dot(inputs, self.mu_kernel) mu_output = K.bias_add(mu_output, self.mu_bias, data_format='channels_last') # dense layer for sigma (variance) of the gaussians sigma_output = K.dot(inputs, self.sigma_kernel) sigma_output = K.bias_add(sigma_output, self.sigma_bias, data_format='channels_last') # Avoid NaN's by pushing sigma through the following custom activation sigma_output = K.elu(sigma_output) + 1 + K.epsilon() # dense layer for pi (amplitude) of the gaussians pi_output = K.dot( inputs, self.pi_kernel) pi_output = K.bias_add(pi_output, self.pi_bias, data_format='channels_last') output = Concatenate()([mu_output, sigma_output, pi_output], name="mdn_outputs") return(output)
def call(self, idx): source_embedding = tf.gather(self.embedding, idx[:, :1]) target_embedding = tf.gather(self.embedding, idx[:, 1:]) to_tangent_space = logarithmic_map(source_embedding, target_embedding) to_tangent_space_mu_zero = parallel_transport(source_embedding, self.mu_zero, to_tangent_space) sigmas = tf.gather(self.sigmas, idx) sigmas = K.elu(sigmas) + 1. kds = kullback_leibler_divergence(mus=to_tangent_space_mu_zero, sigmas=sigmas) kds = K.squeeze(kds, axis=-1) return kds
def elu(x, alpha=1.0): """ Exponential Linear Unit activation function. See: https://arxiv.org/abs/1511.07289v1 def elu(x): if x >= 0: return x else: return alpha * (math.exp(x) - 1.0) >>> elu(0.0) 0.0 >>> elu(1.0) 1.0 >>> elu(0.5, alpha=0.3) 0.5 >>> round(elu(-1), 1) -0.6 """ return K.eval(K.elu(K.variable(x), alpha)).tolist()
def elu(x, alpha=1.0): """ Exponential Linear Unit activation function. See: https://arxiv.org/abs/1511.07289v1 def elu(x): if x >= 0: return x else: return alpha * (math.exp(x) - 1.0) >>> elu(0.0) 0.0 >>> elu(1.0) 1.0 >>> elu(0.5, alpha=0.3) 0.5 >>> round(elu(-1), 1) -0.6 """ return K.eval(K.elu(K.variable(x), alpha)).tolist()
def build_headnet(N, features, embedding_dim, num_negative_samples, num_hidden=128, identity_variance=False): if features is not None: # HEADNet with attributes print("training using node attributes") input_layer = Input((features.shape[1], ), name="attributed_input_layer") input_transform = Dense( num_hidden, # activation="relu", # kernel_initializer=initializer, kernel_regularizer=regularizers.l2(reg), bias_regularizer=regularizers.l2(reg), name="euclidean_transform", )(input_layer) else: print("training without using attributes") input_layer = Input((1, ), name="unattributed_input_layer") input_transform = Embedding(N, num_hidden)(input_layer) input_transform = Activation("relu")(input_transform) hyperboloid_embedding_layer = Dense( embedding_dim, # kernel_initializer=initializer, kernel_regularizer=regularizers.l2(reg), bias_regularizer=regularizers.l2(reg), name="dense_to_hyperboloid", )(input_transform) to_hyperboloid = Lambda(exp_map_0, name="to_hyperboloid")(hyperboloid_embedding_layer) sigma_layer = Dense( embedding_dim, activation=lambda x: K.elu(x) + 1., kernel_initializer="zeros", kernel_regularizer=regularizers.l2(reg), bias_regularizer=regularizers.l2(reg), name="dense_to_sigma", trainable=not identity_variance, )(input_transform) if identity_variance: sigma_layer = Lambda(K.stop_gradient, name="variance_stop_gradient")(sigma_layer) embedder_model = Model(input_layer, [to_hyperboloid, sigma_layer], name="embedder_model") if features is not None: trainable_input = Input(( 1 + num_negative_samples, 2, features.shape[1], ), name="trainable_input_attributed") else: trainable_input = Input(( 1 + num_negative_samples, 2, ), name="trainable_input_non_attributed") mus, sigmas = embedder_model(trainable_input) assert len(mus.shape) == len(sigmas.shape) == 4 mus = Lambda(map_to_tangent_space_mu_zero, name="to_tangent_space_mu_zero")(mus) kds = Lambda(kullback_leibler_divergence, name="kullback_leibler_layer")([mus, sigmas]) trainable_model = Model(trainable_input, kds, name="trainable_model") optimizer = AdamOptimizer(1e-3, ) trainable_model.compile(optimizer=optimizer, loss=asym_hyperbolic_loss, target_tensors=[ tf.placeholder(dtype=tf.int64, shape=(None, 1)), ]) return embedder_model, trainable_model
def elu(x, alpha=1.0): return K.elu(x, alpha)
def elu_modif(x, a=1.): """ Modified ELU activation function """ e = 1e-15 return K.elu(x,alpha=a)+1.+e
def call(self, inputs, **kwargs): return self.lmbd * K.elu(inputs) + (1 - self.lmbd) * K.softplus(inputs)
def _elu(x): return K.elu(x, 1.0)
def elu_plus_one_plus_epsilon(x): """ELU activation with a very small addition to help prevent NaN in loss.""" return K.elu(x) + 1 + K.epsilon()
def elu(x, alpha=0.05): return K.elu(x, alpha)
def call(self, inputs): #self.kernel = K.concatenate((self.kernel_black, self.kernel_white), axis=1) outputs = K.conv1d(inputs, self.kernel, strides=1, padding=self.padding) outputs = K.elu(K.bias_add(outputs, self.bias)) return outputs
def newacti(x, alpha=1.618): return K.elu(x, alpha)
def selu(x): alpha = 1.6732632423543772848170429916717 scale = 1.0507009873554804934193349852946 return scale * tf.where(x >= 0.0, x, alpha * K.elu(x))
def zens(x, alpha=m.exp(-1)): return k.elu(x, alpha)
def elu(x, alpha=1.0): """ Exponential Linear Unit. """ return K.eval(K.elu(K.variable(x), alpha)).tolist()
def call(self, inputs): return backend.elu(inputs, self.alpha)
def call(self, inputs): return K.elu(inputs, self.alpha)
def call(self, inputs): s = K.zeros((K.shape(inputs)[0],self.units)) init_states = [s,s,s,s,s,s] outputs = K.rnn(self.step_do, inputs, init_states)[1] if self.attention: self.attention1_1 = self.attention1[:self.units,:] self.attention1_2 = self.attention1[self.units:,:] for i in range(inputs.shape[1]): step_in = inputs[:,i,:] h = outputs[:,i,:] h_atten=K.elu(K.dot(h,self.attention1_1) ) ##################tanh h_atten=(K.dot(h_atten,self.attention2)) h_b=K.tanh(K.dot(step_in,self.attention1_2)+ self.biase1) ##################tanh h_b=(K.dot(h_b,self.attention2_2)) h_atten = K.elu(h_atten*h + h_b) if i ==0: output_atten = h_atten else: output_atten = K.concatenate([output_atten,h_atten]) outputs = Reshape((inputs.shape[1],self.units))(output_atten) init_states2 = [s,s,s,s,s,s] input2 = K.reverse(inputs,axes=1) outputs2 = K.rnn(self.step_do, input2, init_states2)[1] if self.attention: self.attention1_1 = self.attention1[:self.units,:] self.attention1_2 = self.attention1[self.units:,:] for i in range(inputs.shape[1]): step_in = inputs[:,i,:] h = outputs2[:,i,:] h_atten=K.elu(K.dot(h,self.attention1_1) ) ##################0 h_atten=(K.dot(h_atten,self.attention2)) h_b=K.tanh(K.dot(step_in,self.attention1_2)+ self.biase1) ##################1 h_b=(K.dot(h_b,self.attention2_2)) h_atten = K.elu(h_atten*h + h_b) if i ==0: output_atten = h_atten else: output_atten = K.concatenate([output_atten,h_atten]) outputs2 = Reshape((inputs.shape[1],self.units))(output_atten) outputs2 = K.reverse(outputs2,axes=1) outputs = (K.concatenate([outputs,outputs2])) ''' if self.attention: self.attention1_1 = self.attention1[:2*self.units,:] self.attention1_2 = self.attention1[2*self.units:,:] for i in range(inputs.shape[1]): step_in = inputs[:,i,:] h = outputs[:,i,:] h_atten=K.relu(K.dot(h,self.attention1_1) + 0*self.biase1) ##################0 h_atten=(K.dot(h_atten,self.attention2)) h_b=K.relu(K.dot(step_in,self.attention1_2)+0*self.biase2) ##################1 h_b=(K.dot(h_b,self.attention2_2)) h_atten = K.tanh(1*h_atten*h + 1*h_b) if i ==0: output_atten = h_atten else: output_atten = K.concatenate([output_atten,h_atten]) outputs = Reshape((inputs.shape[1],2*self.units))(output_atten) ''' return outputs
def call(self, inputs, **kwargs): output = self.lmbd * K.elu(inputs) + (1 - self.lmbd) * ( K.softplus(inputs) - self.alpha) return output
def cs_elu(x): return K.elu(x) * .577
def NewElu(x, alpha=1.0): return K.elu(x, alpha) + alpha * 1.0 + 1e-15
def newacti(x, alpha=m.exp(-1)): return K.elu(x, alpha)
def BatchActivate(x): x = BatchNormalization()(x) x = Lambda(lambda x: K.elu(x) + 1)(x) #x = Activation('relu')(x) return x