def base_net(input_shape, summary=False): activ = 'tanh' #LeakyReLU(alpha=0.3) def last_image(tensor): return tensor[:, -1, :] input = Input(shape=input_shape, dtype='float32') #float_input = K.cast(input, dtype='float32') float_input = Lambda(lambda input: input / 255.0 - 0.5)(input) float_input = Lambda(last_image)(float_input) x = Conv2D(32, (8, 8), activation=None)(float_input) x = MaxPooling2D(pool_size=(4, 4), strides=None, padding='same')(x) x = tanh(x) x = Conv2D(64, (4, 4), activation=None)(x) x = MaxPooling2D(pool_size=(2, 2), strides=None, padding='same')(x) x = tanh(x) x = Conv2D(64, (4, 4), activation=None)(x) x = tanh(x) #x = TimeDistributed(activ)(x) # x = Conv2D(128, (2,2), strides=(1,1), padding='same')(x) # x = activ(x) x = Flatten()(x) output = Dense(512, activation='tanh')(x) model = Model(inputs=input, outputs=output) if summary: model.summary() return model
def step(self, x, states): ytm, stm = states # repeat the hidden state to the length of the sequence _stm = K.repeat(stm, self.timesteps) # now multiplty the weight matrix with the repeated hidden state _Wxstm = K.dot(_stm, self.W_a) # calculate the attention probabilities # this relates how much other timesteps contributed to this one. et = K.dot(activations.tanh(_Wxstm + self._uxpb), K.expand_dims(self.V_a)) at = K.exp(et) at_sum = K.sum(at, axis=1) at_sum_repeated = K.repeat(at_sum, self.timesteps) at /= at_sum_repeated # vector of size (batchsize, timesteps, 1) # calculate the context vector context = K.squeeze(K.batch_dot(at, self.x_seq, axes=1), axis=1) # ~~~> calculate new hidden state # first calculate the "r" gate: rt = activations.sigmoid( K.dot(ytm, self.W_r) + K.dot(stm, self.U_r) + K.dot(context, self.C_r) + self.b_r) # now calculate the "z" gate zt = activations.sigmoid( K.dot(ytm, self.W_z) + K.dot(stm, self.U_z) + K.dot(context, self.C_z) + self.b_z) # calculate the proposal hidden state: s_tp = activations.tanh( K.dot(ytm, self.W_p) + K.dot((rt * stm), self.U_p) + K.dot(context, self.C_p) + self.b_p) # new hidden state: st = (1-zt)*stm + zt * s_tp yt = activations.softmax( K.dot(ytm, self.W_o) + K.dot(stm, self.U_o) + K.dot(context, self.C_o) + self.b_o) if self.return_probabilities: return at, [yt, st] else: return yt, [yt, st]
def loss_func(y_true, y_pred): lmse_pred = local_mean_square_error(y_pred) activations_pred = (1.01 - tanh(lmse_pred * 10) - tanh(lmse_pred) / 100) / 1.01 #activations_pred = 1-tanh(lmse_pred) s = tf.shape(activations_pred)[-2] \ *tf.shape(activations_pred)[-3] \ *tf.shape(activations_pred)[-4] gram_pred = gram_matrix(activations_pred) / tf.cast(s, tf.float32) y_pred_mean = K.mean(y_pred, axis=[-4, -3, -2]) return K.sum(K.square(gram_pred - gram_true), axis=[-2, -1]) \ + K.sum(K.square(y_pred_mean - y_true_mean), axis=[-1])*10
def call(self, inputs, training=None, mask=None): # pass through network hidden = self.d1(inputs) hidden = self.d2(hidden) dout = self.dout(hidden) mu = dout[:, :self.action_dimension] if self.fix_sigma: sigma = tanh(tf.ones_like(mu, dtype=tf.float32)) else: sigma = tf.exp(tanh(dout[:, self.action_dimension:])) action = {'mu': mu, 'sigma': sigma} return action
def CreateGenerator(DropOut, Alpha, InputShape=(512, 512, 3)): GeneratorInput = Input(InputShape) NumFiltersGenerator = 16 ConvLayerOut1 = ConvLayer(GeneratorInput, NumFilters=NumFiltersGenerator, BatchNormalizationON=False, Alpha=Alpha) ConvLayerOut2 = ConvLayer(ConvLayerOut1, NumFilters=NumFiltersGenerator * 2, Alpha=Alpha) ConvLayerOut3 = ConvLayer(ConvLayerOut2, NumFilters=NumFiltersGenerator * 4, Alpha=Alpha) ConvLayerOut4 = ConvLayer(ConvLayerOut3, NumFilters=NumFiltersGenerator * 8, Alpha=Alpha) ConvLayerOut5 = ConvLayer(ConvLayerOut4, NumFilters=NumFiltersGenerator * 8, Alpha=Alpha) ConvLayerOut6 = ConvLayer(ConvLayerOut5, NumFilters=NumFiltersGenerator * 8, Alpha=Alpha) ConvLayerOut7 = ConvLayer(ConvLayerOut6, NumFilters=NumFiltersGenerator * 8, Alpha=Alpha) ConvLayerOut8 = ConvLayer(ConvLayerOut7, NumFilters=NumFiltersGenerator * 8, Alpha=Alpha) ConvTransOut1 = ConvTransLayer(ConvLayerOut8, NumFiltersGenerator * 8, Alpha=Alpha) ConvTransOut2 = ConvTransLayer(ConvTransOut1, NumFiltersGenerator * 8, convOut=ConvLayerOut7, DropOutRate=DropOut, Alpha=Alpha) ConvTransOut3 = ConvTransLayer(ConvTransOut2, NumFiltersGenerator * 8, convOut=ConvLayerOut6, DropOutRate=DropOut, Alpha=Alpha) ConvTransOut4 = ConvTransLayer(ConvTransOut3, NumFiltersGenerator * 8, convOut=ConvLayerOut5, DropOutRate=DropOut, Alpha=Alpha) ConvTransOut5 = ConvTransLayer(ConvTransOut4, NumFiltersGenerator * 4, convOut=ConvLayerOut4, Alpha=Alpha) ConvTransOut6 = ConvTransLayer(ConvTransOut5, NumFiltersGenerator * 2, convOut=ConvLayerOut3, Alpha=Alpha) ConvTransOut7 = ConvTransLayer(ConvTransOut6, NumFiltersGenerator, convOut=ConvLayerOut2, Alpha=Alpha) ConvTransOut8 = ConvTransLayer(ConvTransOut7, 3, convOut=ConvLayerOut1, Activation=False, BatchNormalizationON=False, Alpha=Alpha) # bn false #activation false GenOut = tanh(ConvTransOut8) return Model(inputs=GeneratorInput, outputs=GenOut)
def call(self, x, mask=None): uit = tf.tensordot(x, self.W, axes=1) if self.bias: uit += self.b uit = activations.tanh(uit) # ait = K.dot(uit, self.u) ait = tf.tensordot(uit, self.u, axes=1) a = activations.exponential(ait) # apply mask after the exp. will be re-normalized next if mask is not None: # Cast the mask to floatX to avoid float64 upcasting in theano a *= tf.cast(mask, K.floatx()) # in some cases especially in the early stages of training the sum may be almost zero # and this results in NaN's. A workaround is to add a very small positive number ε to the sum. # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx()) a /= tf.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) a = K.expand_dims(a) weighted_input = x * a result = K.sum(weighted_input, axis=1) if self.return_attention: return [result, a] return result
def get_mixture_coef(self, out_tensor): """ Parses the output tensor to appropriate mixture density coefficients""" # This uses eqns 18 -> 23 of http://arxiv.org/abs/1308.0850. # Pen states: z_pen_logits = out_tensor[:, :, 0:3] # Process outputs into MDN parameters M = self.hps['num_mixture'] dist_params = [ out_tensor[:, :, (3 + M * (n - 1)):(3 + M * n)] for n in range(1, 7) ] z_pi, z_mu1, z_mu2, z_sigma1, z_sigma2, z_corr = dist_params # Softmax all the pi's and pen states: z_pi = softmax(z_pi) z_pen = softmax(z_pen_logits) # Exponent the sigmas and also make corr between -1 and 1. z_sigma1 = K.exp(z_sigma1) z_sigma2 = K.exp(z_sigma2) z_corr = tanh(z_corr) r = [ z_pi, z_mu1, z_mu2, z_sigma1, z_sigma2, z_corr, z_pen, z_pen_logits ] return r
def GRU(dim, x): # Learnable weights in the cell Wzx = layers.Dense(dim) Wzh = layers.Dense(dim, use_bias=False) Wrx = layers.Dense(dim) Wrh = layers.Dense(dim, use_bias=False) Wx = layers.Dense(dim) Wh = layers.Dense(dim, use_bias=False) # unstacking the time axis x = tf.unstack(x, axis=1) H = [] h = tf.zeros_like(Wx(x[0])) for i in range(len(x)): # -- missing code -- z = sigmoid(Wzx(x[i]) + Wzh(h)) r = sigmoid(Wrx(x[i]) + Wrh(h)) ht = tanh(Wx(x[i]) + Wh(h) * r) h = (1 - z) * h + z * ht H.append(h) H = tf.stack(H, axis=1) return h, H
def __call__(self, message_up, message_down): inputs = tf.concat([message_up, message_down], axis=-1) action_out = self.action_net(inputs) message_out = self.message_net(inputs) action_mu = action_out[:, :self.action_dim] if self.fix_sigma: action_sigma = tanh(tf.ones_like(action_mu, dtype=tf.float32)) else: action_sigma = tf.exp(tanh(action_out[:, self.action_dim:])) message_1 = self.normalize(message_out[:, :-self.message_dim]) message_2 = self.normalize(message_out[:, self.message_dim:]) action = {'mu': action_mu, 'sigma': action_sigma} return action, message_1, message_2
def call(self, inputs, training=False): """ Implements the feed forward part of the network """ x = inputs for layer in self.layers_[:-1]: x = layer(x) x = relu(x, alpha=self.leaky_alpha) x = self.dropout(x, training) x = self.layers_[-1](x) return tanh(x)
def call(self, encoder_outputs, decoder_outputs, mask=None): w1_e = self.W1(encoder_outputs) w2_d = self.W2(decoder_outputs) tanh_output = activations.tanh(w1_e + w2_d) v_dot_tanh = self.V(tanh_output) if mask is not None: v_dot_tanh += (mask * -1e-9) attention_weights = activations.softmax(v_dot_tanh, axis=1) att_shape = tf.shape(attention_weights) return tf.reshape(attention_weights, (att_shape[0], att_shape[1]))
def call(self, query, value): query = K.expand_dims(query, 1) l_a = [] c = [] for agent_index, agent_encode in enumerate(value): l = self.softmax( self.V(tanh(self.W5(agent_encode) + self.W6(query)))) c.append( Lambda(lambda x: tf.reduce_sum(x, axis=1))(l * agent_encode)) return K.stack(c, axis=1)
def call(self, inputs, training=False): """ Implements the feed forward part of the network """ x = inputs for layer in self.layers_[:-1]: x = layer(x) x = sigmoid(x) x = self.layers_[-1](x) if self.decoder: x = tanh(x) else: x = sigmoid(x) return x
def get_initial_state(self, inputs): # apply the matrix on the first time step to get the initial s0. s0 = activations.tanh(K.dot(inputs[:, 0], self.W_s)) # from keras.layers.recurrent to initialize a vector of (batchsize, # output_dim) y0 = K.zeros_like(inputs) # (samples, timesteps, input_dims) y0 = K.sum(y0, axis=(1, 2)) # (samples, ) y0 = K.expand_dims(y0) # (samples, 1) y0 = K.tile(y0, [1, self.output_dim]) return [y0, s0]
def propagate_gru(weight, inputs, states, units): kernel = K.variable(weight[0]) # shape = (input_dim, self.units * 3) recurrent_kernel = K.variable(weight[1]) # shape = (self.units, self.units * 3) bias = K.variable(weight[2]) # bias_shape = (3 * self.units,) # Update gate. kernel_z = kernel[:, :units] recurrent_kernel_z = recurrent_kernel[:, :units] # Reset gate. kernel_r = kernel[:, units:units * 2] recurrent_kernel_r = recurrent_kernel[:, units:units * 2] # New gate. kernel_h = kernel[:, units * 2:] recurrent_kernel_h = recurrent_kernel[:, units * 2:] # Assume use bias, not reset_after input_bias_z = bias[:units] input_bias_r = bias[units: units * 2] input_bias_h = bias[units * 2:] # Bias for hidden state - just for compatibility with CuDNN. # Call inputs = K.variable(inputs) # Not sure. states = K.variable(states) # Not sure. h_tm1 = states # Previous memory state. # Assume no dropout in this layer and self.implementation = 1 and not reset_after. inputs_z = inputs inputs_r = inputs inputs_h = inputs x_z = K.bias_add(K.dot(inputs_z, kernel_z), input_bias_z) x_r = K.bias_add(K.dot(inputs_r, kernel_r), input_bias_r) x_h = K.bias_add(K.dot(inputs_h, kernel_h), input_bias_h) recurrent_z = K.dot(h_tm1, recurrent_kernel_z) recurrent_r = K.dot(h_tm1, recurrent_kernel_r) z = hard_sigmoid(x_z + recurrent_z) # Recurrent activation = 'hard_sigmoid'. r = hard_sigmoid(x_r + recurrent_r) recurrent_h = K.dot(r * h_tm1, recurrent_kernel_h) hh = tanh(x_h + recurrent_h) # Activation = 'tanh'. # Previous and candidate state mixed by update gate. h = z * h_tm1 + (1 - z) * hh # for w in [r, z, h, hh]: # w = K.get_value(w) # print(np.percentile(w, [0, 1, 25, 50, 75, 99, 100])) return {'r': r, 'z': z, 'h': h, 'hh': hh}
def call(self, inputs): """ Performs the forward pass of the actor network """ d_4 = super(ActorModel, self).call(inputs) # layer 5 from features d_5_features = self.dense5_features(d_4) d_5_features = BatchNormalization()(d_5_features) d_5_features = relu(d_5_features) d_6 = self.dense6(d_5_features) d_6 = tanh(d_6) # layer 6 return tf.math.multiply(d_6, self.action_bound)
def call(self, input_tensor, training=False): """ :param input_tensor: The output of the last residual convolutional block :param training: True means that the layer will normalize the inputs using the the data of the current batch False means that the layer will normalize using the mean and variance learned during training :return: The predicted value of this state (discounted sum of future rewards within a specific amount of steps) """ x = self.conv2d_layer(input_tensor) x = self.batch_norm_layer(x, training=training) x = tf.nn.relu(x) x = self.flatten_layer(x) x = self.dense_layer_1(x) x = tf.nn.relu(x) x = self.dense_layer_2(x) return tanh(x)
def call(self, x, training=False): skips = [] for layer in self.layers_[:-1]: if "MP" in layer.name: skips.append(x) x = layer(x) elif "UP" in layer.name: x = layer(x) x = x + skips.pop() else: x = self.dropout(x, training) x = layer(x) x = relu(x, alpha=self.leaky_alpha) x = self.dropout(x, training) x = self.layers_[-1](x) x = tanh(x) return x
def decoder(input, size1: Any = None, size2: Any = None, reuse: Any = None, name: Any = None): _ = size1 _ = size2 _ = reuse _ = name # X Y F # 32 32 ??? out = double_conv(input, 128, 2, transpose=True) # . 64 64 128 out = double_conv(out, 64, 2) # .................... 128 128 64 out = double_conv(out, 32) # ....................... 128 128 32 out = double_conv(out, 16) # ....................... 128 128 16 out = make_conv(out, 3) # .......................... 128 128 3 out = tanh(out) return out
def call(self, local_encoder_outputs): for x in local_encoder_outputs: self._contextual_encoder[0].append(x) for layer_index in range(self.layer_num - 1): for agent_index in range(self.agents_num): temp_z = [ K.reshape(x[:, -1, :], [self.batch_size, 1, self.encode_dim]) for x in self._contextual_encoder[layer_index] ] z = Lambda(lambda x: tf.add_n(x))(temp_z) z = z / (self.agents_num - 1) z = K.tile(z, [1, local_encoder_outputs[0].shape[1], 1]) f = tanh( self.w3(self._contextual_encoder[layer_index][agent_index]) + self.w4(z)) self._contextual_encoder[layer_index + 1].append( self.dense(self.bi_lstm(f))) return self._contextual_encoder[-1]
def generator(momentum=0.8): lr_in = tf.keras.Input(shape=(None,None,3)) hr_out = Conv2D(filters = 64, kernel_size = (9,9), padding='SAME')(lr_in) #k9n64s1 hr_out = B = PReLU(shared_axes=[1, 2])(hr_out) for i in range(16): B_internal = B B_internal = Conv2D(filters = 64, kernel_size = (3,3), padding='SAME')(B_internal) #k3n64s1 B_internal = BatchNormalization(momentum=momentum)(B_internal) B_internal = PReLU(shared_axes=[1, 2])(B_internal) B_internal = Conv2D(filters = 64, kernel_size = (3,3), padding='SAME')(B_internal) #k3n64s1 B_internal = BatchNormalization(momentum=momentum)(B_internal) B = Add()([B, B_internal]) B = Conv2D(filters = 64, kernel_size = (3,3), padding='SAME')(B) #k3n64s1 B = BatchNormalization(momentum=momentum)(B) hr_out = Add()([hr_out, B]) for i in range(2): hr_out = Conv2D(filters = 256, kernel_size = (3,3), padding = "SAME", kernel_initializer=ICNR(GlorotUniform()))(hr_out) #k3n256s1 hr_out = UpSampling2D(size=2)(hr_out) hr_out = LeakyReLU(alpha=0.2)(hr_out) hr_out = Conv2D(filters = 3, kernel_size = (9,9), padding = "SAME")(hr_out) #k9n3s1 hr_out = tanh(hr_out) return Model(lr_in, hr_out, name="GAN_GEN")
def call(self, inputs, condition): """Computes a forward pass of the Wavenet layer Args: inputs (tf.Tensor): input data, shape=(batch size, sample length, 1) condition (tf.Tensor): upsampled conditional input from encoder should have shape=(batch size, sample length, encoding channels) Returns: (resid, skip) tuple of tf.Tensor objects resid has shape (batch size, sample length, residual_channels) skip has shape (batch size, sample length, skip_channels) """ x = self.dilated_conv(inputs) z = self.conditional_conv(condition) x = x + z gate, output = tf.split(x, 2, axis=2) x = tanh(output) * sigmoid(gate) skip = self.skip_conv(x) resid = self.resid_conv(x) + inputs return resid, skip
def calculate_alignment(self, hidden_repeated, input_seq_transformed): return self._alignment_weight( tanh( self._query_weight( K.concatenate([hidden_repeated, self._input_seq], -1))))
def call(self, input_tensor, mask=None): return tanh(input_tensor)
def call(self, inputs, encoder_states, encoder_inputs, init_step=False, prev_state=None, weighted=None): # inputs(y_(t-1)): [batch_size] <- idx of next input to the decoder # encoder_states: [batch_size x seq x hidden*2] <- hidden states created at encoder # encoder_inputs: [batch_size x seq] <- idx of inputs used at encoder # prev_state(s_(t-1)): [1 x batch_size x hidden] <- hidden states to be used at decoder # weighted: [batch_size x 1 x hidden*2] <- weighted attention of previous state, init with all zeros batch_size = encoder_states.shape[0] seq_len = encoder_states.shape[1] vocab_size = self.vocab_size hidden_size = self.hidden_size inputs = tf.cast(inputs, tf.int32) encoder_inputs = tf.cast(encoder_inputs, tf.int32) assert batch_size == inputs.shape[0], 'encoder and decoder inputs must have same sequence batch size' if init_step==True: prev_state = self.Ws(encoder_states[:,-1]) # picks last state # prev_state = tf.expand_dims(prev_state, 0) weighted = tf.zeros((batch_size, 1, hidden_size*2)) gru_input = tf.concat([tf.expand_dims(self.embed(inputs), 1), weighted], 2) state = self.gru(gru_input, initial_state=prev_state) state = tf.squeeze(state) score_g = self.Wo(state) score_c = tanh(self.Wc(tf.reshape(encoder_states, (-1, hidden_size*2)))) # [batch_size*seq x hidden_size] score_c = tf.reshape(score_c, (batch_size,-1,hidden_size)) # [batch_size x seq x hidden_size] score_c = tf.squeeze(tf.matmul(score_c, tf.expand_dims(state, 2))) # batch multpication -> [batch_size x seq] score_c = tanh(score_c) encoder_mask = tf.cast(encoder_inputs == 0, tf.float32)*(-1000) # in order to work, PAD idx MUST be 0 score_c = score_c + encoder_mask # [batch_size x seq]; padded parts will get close to 0 when applying softmax score = tf.concat([score_g, score_c],1) # [batch_size x (vocab + seq)] probs = tf.nn.softmax(score) prob_g = probs[:,:vocab_size] # [batch_size x vocab] prob_c = probs[:,vocab_size:] # [batch_size x seq] if self.max_oovs > 0: oovs = tf.zeros((batch_size, self.max_oovs), tf.float32) + 1e-5 prob_g = tf.concat([prob_g, oovs], 1) one_hot = tf.one_hot(encoder_inputs, depth=vocab_size+self.max_oovs, axis=-1) # one hot tensor: [batch_size x seq x vocab+max_oovs] prob_c_to_g = tf.matmul(tf.expand_dims(prob_c, 1), one_hot) # [batch_size x 1 x vocab+max_oovs] prob_c_to_g = tf.squeeze(prob_c_to_g) # [batch_size x vocab+max_oovs] out = prob_g + prob_c_to_g # [batch_size x vocab+max_oovs] # out = tf.expand_dims(out, 1) # [batch_size x 1 x vocab+max_oovs] repeat_inputs = tf.tile(tf.expand_dims(inputs, 1), multiples=(1, seq_len)) idx_from_input = tf.equal(encoder_inputs, repeat_inputs) idx_from_input = tf.reduce_any(idx_from_input, axis=1) # shows whether each decoder input has previously appeared in the encoder idx_from_input = tf.cast(idx_from_input, tf.float32) idx_from_input = tf.expand_dims(idx_from_input, 1) attn = prob_c * idx_from_input attn = tf.expand_dims(attn, 1) # [batch_size x 1 x seq] weighted = tf.matmul(attn, encoder_states) return out, state, weighted
def __init__(self, in_channels, out_channels, depth, conv_num, wf, padding, norm_layer, up_mode='upsample', with_tanh=False, antialiasing=True, *args, **kwargs): super().__init__(*args, **kwargs) assert depth > 0 self.in_channels = in_channels self.out_channels = out_channels self.depth = depth - 1 self.conv_num = conv_num self.wf = wf self.padding = padding self.norm_layer = norm_layer self.up_mode = up_mode self.with_tanh = with_tanh self.antialiasing = antialiasing first_layers = [ ReflectionPadding2D(padding=3), Conv2D(2**wf, kernel_size=7, padding='valid', name='first'), LeakyReLU(alpha=0.2) ] prev_channels = 2**wf down_sample = [] down_path = [] for i in range(depth): if antialiasing and depth > 0: down_sample.append([ ReflectionPadding2D(padding=1), Conv2D(prev_channels, kernel_size=3, strides=1, padding='valid', name='down_sample'), BatchNormalization(), LeakyReLU(alpha=0.2), BlurPool2D(stride=2) ]) else: down_sample.append([ ReflectionPadding2D(padding=1), Conv2D(prev_channels, kernel_size=4, strides=2, padding='valid', name='down_sample'), BatchNormalization(), LeakyReLU(alpha=0.2) ]) down_path.append( UNetConvBlock(conv_num, 2**(wf + i + 1), padding, norm_layer, name='down_path')) prev_channels = 2**(wf + i + 1) up_path = [] for i in reversed(range(depth)): up_path.append( UNetUpBlock(conv_num, 2**(wf + i), up_mode, padding, norm_layer)) prev_channels = 2**(wf + i) last_layers = [ ReflectionPadding2D(padding=1), Conv2D(out_channels, kernel_size=3, padding='valid', name='last') ] if with_tanh: last_layers.append(Lambda(lambda x: tanh(x), trainable=False)) self.inner_layers = [ first_layers, down_path, down_sample, up_path, last_layers ]
##激活函数deno if __name__ == '__main__': x = tf.linspace(-5., 5., 100) # 构造一段连续的数据 x_ndarray = x.numpy() # 转换为 ndarray 的类型 ###sigmoid函数也叫Logistic函数,可以用来做二分类。在特征相差比较复杂或是相差不是特别大时效果比较好 y_relu = activations.sigmoid(x) # 使用 Relu 函数运算 plt.plot(x, y_relu, c='red', label='sigmoid') # 画折线图 plt.ylim((-1, 1)) #y的范围 plt.legend(loc='best') plt.show() ##Tanh是可以通过sigmoid平移等操作变化而来。但它的收敛速度要比sigmoid收敛的要快。其他的优缺点和sigmoid函数类似。它就是为了克服Sigmoid的不对原点对称的坏毛病。可惜的是它在两边还是有梯度饱和(也就是梯度趋近于0)的问题。 x = tf.linspace(-5., 5., 100) # 构造一段连续的数据 x_ndarray = x.numpy() # 转换为 ndarray 的类型 y = activations.tanh(x) plt.plot(x, y, c='red', label='tanh') # 画折线图 plt.ylim((-1.2, 1.2)) plt.legend(loc='best') plt.show() ###relu 比 sigmoid 和 tanh 快;(梯度不会饱和,解决了梯度消失问题 ##缺点L:训练的时候很”脆弱”,因为当取负号的时候 ,它的导数为零,预示着后半段就没什么作用了。 x = tf.linspace(-5., 5., 100) # 构造一段连续的数据 x_ndarray = x.numpy() # 转换为 ndarray 的类型 y_relu = activations.relu(x) plt.plot(x, y_relu, c='red', label='relu') # 画折线图 plt.ylim((-0.5, 1.2)) plt.legend(loc='best') plt.show()
def tanh(x): """ Return tanh activation on layer x """ return activations.tanh(x)
def calculate_alignment(self, hidden_repeated, input_seq_transformed): return self._alignment_weight( tanh(hidden_repeated + input_seq_transformed))
y_train = to_categorical(y_train, num_classes = 10) x_test = tf.expand_dims(x_test, axis = 3) x_test = tf.image.resize(x_test, (32, 32), method = 'bilinear') x_test /= 255 y_test = to_categorical(y_test, num_classes = 10) model = Sequential() A = 1.7159 Atanh = lambda x: A * tanh(x) model.add( Conv2D( 6, (5, 5), strides = (1, 1), padding = 'valid', data_format = 'channels_last', input_shape = (32, 32, 1))) model.add( AveragePooling2D( pool_size = (2, 2), strides = (2, 2), padding = 'valid',