def channel_attention(input_xs, reduction_ratio): # input_xs (None,50,50,64) # 判断输入数据格式,是channels_first还是channels_last channel_axis = 1 if k.image_data_format() == "channels_first" else 3 # get channel 彩色图片为 3 channel = int(input_xs.shape[channel_axis]) # 64 maxpool_channel = kl.GlobalMaxPooling2D()(input_xs) # (None,64) 全局最大池化 maxpool_channel = kl.Reshape((1, 1, channel))(maxpool_channel) # ( None,1,1,64) avgpool_channel = kl.GlobalAvgPool2D()(input_xs) # (None,64) 全局平均池化 avgpool_channel = kl.Reshape((1, 1, channel))(avgpool_channel) # (None,1,1,64) # 权值共享 dense_one = kl.Dense(units=int(channel * reduction_ratio), activation='relu', kernel_initializer='he_normal', use_bias=True, bias_initializer='zeros') dense_two = kl.Dense(units=int(channel), activation='relu', kernel_initializer='he_normal', use_bias=True, bias_initializer='zeros') # max path mlp_1_max = dense_one(maxpool_channel) # (None,1,1,32) mlp_2_max = dense_two(mlp_1_max) # (None,1,1,64) mlp_2_max = kl.Reshape(target_shape=(1, 1, int(channel)))(mlp_2_max) # (None,1,1,64) # avg path mlp_1_avg = dense_one(avgpool_channel) # (None,1,1,32) mlp_2_avg = dense_two(mlp_1_avg) # (None,1,1,64) mlp_2_avg = kl.Reshape(target_shape=(1, 1, int(channel)))(mlp_2_avg) # (None,1,1,64) channel_attention_feature = kl.Add()([mlp_2_max, mlp_2_avg]) # (None,1,1,64) channel_attention_feature = kl.Activation('sigmoid')(channel_attention_feature) # (None,1,1,64) multiply_channel_input = kl.Multiply()([channel_attention_feature, input_xs]) # (None,50,50,64) return multiply_channel_input
def deep_q_network(state_shape, action_size, learning_rate, hidden_neurons): """Creates a Deep Q Network to emulate Q-learning. Creates a two hidden-layer Deep Q Network. Similar to a typical nueral network, the loss function is altered to reduce the difference between predicted Q-values and Target Q-values. Args: space_shape: a tuple of ints representing the observation space. action_size (int): the number of possible actions. learning_rate (float): the nueral network's learning rate. hidden_neurons (int): the number of neurons to use per hidden layer. """ state_input = layers.Input(state_shape, name='frames') actions_input = layers.Input((action_size, ), name='mask') hidden_1 = layers.Dense(hidden_neurons, activation='relu')(state_input) hidden_2 = layers.Dense(hidden_neurons, activation='relu')(hidden_1) q_values = layers.Dense(action_size)(hidden_2) masked_q_values = layers.Multiply()([q_values, actions_input]) model = models.Model(inputs=[state_input, actions_input], outputs=masked_q_values) optimizer = tf.keras.optimizers.RMSprop(lr=learning_rate) model.compile(loss='mse', optimizer=optimizer) return model
def atari_model(): # With the functional API we need to define the inputs. frames_input = layers.Input(ATARI_SHAPE, name='frames') actions_input = layers.Input((ACTION_SIZE,), name='action_mask') # Assuming that the input frames are still encoded from 0 to 255. Transforming to [0, 1]. normalized = layers.Lambda(lambda x: x / 255.0, name='normalization')(frames_input) # "The first hidden layer convolves 16 8×8 filters with stride 4 with the input image and applies a rectifier nonlinearity." conv_1 = layers.Conv2D(16, (8, 8), strides=(4, 4), activation='relu')(normalized) # "The second hidden layer convolves 32 4×4 filters with stride 2, again followed by a rectifier nonlinearity." conv_2 = layers.Conv2D(32, (4, 4), strides=(2, 2), activation='relu')(conv_1) # Flattening the second convolutional layer. conv_flattened = layers.Flatten()(conv_2) # "The final hidden layer is fully-connected and consists of 256 rectifier units." hidden = layers.Dense(256, activation='relu')(conv_flattened) # "The output layer is a fully-connected linear layer with a single output for each valid action." output = layers.Dense(ACTION_SIZE)(hidden) # Finally, we multiply the output by the mask! filtered_output = layers.Multiply(name='QValue')([output, actions_input]) model = Model(inputs=[frames_input, actions_input], outputs=filtered_output) model.summary() optimizer = RMSprop(lr=FLAGS.learning_rate, rho=0.95, epsilon=0.01) # model.compile(optimizer, loss='mse') # to changed model weights more slowly, uses MSE for low values and MAE(Mean Absolute Error) for large values model.compile(optimizer, loss=huber_loss) return model
def AttnGatingBlock(x, g, inter_shape): shape_x = backend.int_shape(x) shape_g = backend.int_shape(g) print(shape_x, shape_g) theta_x = layers.Conv2D(filters=inter_shape, kernel_size=(1, 1), padding='same')(x) phi_g = layers.Conv2D(filters=inter_shape, kernel_size=(1, 1), padding='same')(g) print(backend.int_shape(theta_x), backend.int_shape(phi_g)) concat_xg = layers.Add()([phi_g, theta_x]) act_xg = layers.Activation('relu')(concat_xg) psi = layers.Conv2D(filters=1, kernel_size=(1, 1), padding='same')(act_xg) sigmoid_xg = layers.Activation('sigmoid')(psi) upsample_psi = expend_as(sigmoid_xg, shape_x[3]) y = layers.Multiply()([upsample_psi, x]) result = layers.Conv2D(filters=shape_x[3], kernel_size=(1, 1), padding='same')(y) result_bn = layers.BatchNormalization()(result) print(backend.int_shape(result_bn)) print('-----') return theta_x
def message_block(original_atom_state, original_bond_state, connectivity): """ Performs the graph-aware updates """ atom_state = layers.LayerNormalization()(original_atom_state) bond_state = layers.LayerNormalization()(original_bond_state) source_atom = nfp.Gather()([atom_state, nfp.Slice(np.s_[:, :, 1])(connectivity)]) target_atom = nfp.Gather()([atom_state, nfp.Slice(np.s_[:, :, 0])(connectivity)]) # Edge update network new_bond_state = layers.Concatenate()( [source_atom, target_atom, bond_state]) new_bond_state = layers.Dense( 2*atom_features, activation='relu')(new_bond_state) new_bond_state = layers.Dense(atom_features)(new_bond_state) bond_state = layers.Add()([original_bond_state, new_bond_state]) # message function source_atom = layers.Dense(atom_features)(source_atom) messages = layers.Multiply()([source_atom, bond_state]) messages = nfp.Reduce(reduction='sum')( [messages, nfp.Slice(np.s_[:, :, 0])(connectivity), atom_state]) # state transition function messages = layers.Dense(atom_features, activation='relu')(messages) messages = layers.Dense(atom_features)(messages) atom_state = layers.Add()([original_atom_state, messages]) return atom_state, bond_state,
def block(inputs): x = inputs x = layers.Lambda(lambda a: K.mean(a, axis=spatial_dims, keepdims=True))(x) x = layers.Conv2D( num_reduced_filters, kernel_size=[1, 1], strides=[1, 1], kernel_initializer=conv_kernel_initializer, padding='same', name=block_name + 'se_reduce_conv2d', use_bias=True )(x) x = Swish(name=block_name + 'se_swish')(x) x = layers.Conv2D( filters, kernel_size=[1, 1], strides=[1, 1], kernel_initializer=conv_kernel_initializer, padding='same', name=block_name + 'se_expand_conv2d', use_bias=True )(x) x = layers.Activation('sigmoid')(x) out = layers.Multiply()([x, inputs]) return out
def call(self, x): se = self.ga(x) #Squeeze se = self.rs(se) se = self.d1(se) #Excitation se = self.d2(se) se = tfkl.Multiply()([se, x]) #Scaling return se
def _se_block(inputs, filters, se_ratio, prefix): x = layers.GlobalAveragePooling2D(name=prefix + 'squeeze_excite/AvgPool')(inputs) if backend.image_data_format() == 'channels_first': x = layers.Reshape((filters, 1, 1))(x) else: x = layers.Reshape((1, 1, filters))(x) x = layers.Conv2D(_depth(filters * se_ratio), kernel_size=1, padding='same', name=prefix + 'squeeze_excite/Conv')(x) x = layers.ReLU(name=prefix + 'squeeze_excite/Relu')(x) x = layers.Conv2D(filters, kernel_size=1, padding='same', name=prefix + 'squeeze_excite/Conv_1')(x) x = layers.Activation(hard_sigmoid)(x) if backend.backend() == 'theano': # For the Theano backend, we have to explicitly make # the excitation weights broadcastable. x = layers.Lambda(lambda br: backend.pattern_broadcast( br, [True, True, True, False]), output_shape=lambda input_shape: input_shape, name=prefix + 'squeeze_excite/broadcast')(x) x = layers.Multiply(name=prefix + 'squeeze_excite/Mul')([inputs, x]) return x
def _process_image(self, input_shape, cfg, name, embedding_length): inputs = layers.Input(shape=input_shape) goal_embedding = layers.Input(shape=(embedding_length)) expand_dims = tf.keras.layers.Lambda( lambda inputs: tf.expand_dims(inputs[0], axis=inputs[1])) out = inputs for cfg in cfg.conv_layer_config: if cfg[0] < 0: conv = layers.Conv2D(filters=-cfg[0], kernel_size=cfg[1], strides=cfg[2], activation=tf.nn.relu, padding='SAME') out = conv(out) else: # Film conv = layers.Conv2D(filters=cfg[0], kernel_size=cfg[1], strides=cfg[2], activation=None, padding='SAME') out = conv(out) out = layers.BatchNormalization(center=False, scale=False)(out) gamma = layers.Dense(cfg[0])(goal_embedding) beta = layers.Dense(cfg[0])(goal_embedding) gamma = expand_dims((expand_dims((gamma, 1)), 1)) beta = expand_dims((expand_dims((beta, 1)), 1)) out = layers.Multiply()([out, gamma]) out = layers.Add()([out, beta]) out = layers.ReLU()(out) all_inputs = {'state_input': inputs, 'goal_embedding': goal_embedding} overall_layer = tf.keras.Model(name='vl_embedding', inputs=all_inputs, outputs=out) return overall_layer
def __init__(self, input_type='image_image', input_shape=(1, 128, 128), latent_dim=32, factor_type=['dense']): super(GFN_decoder, self).__init__(name='GFN_decoder') self.input_type = input_type self.in_shape = input_shape self.latent_dim = latent_dim self.factor_type = factor_type #self.inputlayer = layers.Input(shape = self.in_shape, name = 'enc_in_img_pos') self.xlayer = layers.Lambda(lambda x: x[:, :, :self.latent_dim]) self.hlayer = layers.Lambda(lambda x: x[:, :, self.latent_dim:]) self.hflatten = layers.Flatten() self.xflatten = layers.Flatten() self.hlayer_1 = layers.Dense(self.latent_dim, activation='relu') self.hlayer_2 = layers.Dense(self.latent_dim, activation='relu') self.hlayer_3 = layers.Dense(self.latent_dim, activation='relu') self.hreshape = layers.Reshape(( 1, self.latent_dim, )) self.xlayer_1 = layers.Dense(self.latent_dim, activation='relu') self.xlayer_2 = layers.Dense(self.latent_dim, activation='relu') self.xlayer_3 = layers.Dense(self.latent_dim, activation='relu') self.xreshape = layers.Reshape(( self.latent_dim, 1, )) self.matmul = layers.Multiply() if ('dense' in self.factor_type): self.ylayer_1 = layers.Dense(self.latent_dim, activation='relu') self.ylayer_2 = layers.Dense(self.latent_dim, activation='relu') self.ylayer_3 = layers.Dense(self.latent_dim, activation='relu') else: self.ylayer_1 = layers.Dense(self.latent_dim, activation='relu') self.ylayer_2 = layers.Dense(self.latent_dim, activation='relu') self.ylayer_3 = layers.Dense(self.latent_dim, activation='relu') if (('posture' in self.input_type) and (not ('image' in self.input_type))): self.yrecon = layers.Reshape(( 1, self.in_shape[1], )) else: self.yrecon = layers.Reshape(( 1, self.in_shape[1] * self.in_shape[1], ))
def message_block(original_atom_state, original_bond_state, connectivity, i): atom_state = original_atom_state bond_state = original_bond_state source_atom = nfp.Gather()( [atom_state, nfp.Slice(np.s_[:, :, 1])(connectivity)]) target_atom = nfp.Gather()( [atom_state, nfp.Slice(np.s_[:, :, 0])(connectivity)]) # Edge update network new_bond_state = layers.Concatenate(name='concat_{}'.format(i))( [source_atom, target_atom, bond_state]) new_bond_state = layers.Dense(2 * embed_dimension, activation='relu')(new_bond_state) new_bond_state = layers.Dense(embed_dimension)(new_bond_state) bond_state = layers.Add()([original_bond_state, new_bond_state]) # message function source_atom = layers.Dense(embed_dimension)(source_atom) messages = layers.Multiply()([source_atom, bond_state]) messages = nfp.Reduce(reduction='sum')( [messages, nfp.Slice(np.s_[:, :, 0])(connectivity), atom_state]) # state transition function messages = layers.Dense(embed_dimension, activation='relu')(messages) messages = layers.Dense(embed_dimension)(messages) atom_state = layers.Add()([original_atom_state, messages]) return atom_state, bond_state
def channel_squeeze_excite_block(input, ratio=0.25): init = input channel_axis = 1 if K.image_data_format() == "channels_first" else -1 filters = init._keras_shape[channel_axis] cse_shape = (1, 1, filters) cse = layers.GlobalAveragePooling2D()(init) cse = layers.Reshape(cse_shape)(cse) ratio_filters = int(np.round(filters * ratio)) if ratio_filters < 1: ratio_filters += 1 cse = layers.Conv2D( ratio_filters, (1, 1), padding="same", activation="relu", kernel_initializer="he_normal", use_bias=False, )(cse) cse = layers.BatchNormalization()(cse) cse = layers.Conv2D( filters, (1, 1), activation="sigmoid", kernel_initializer="he_normal", use_bias=False, )(cse) if K.image_data_format() == "channels_first": cse = layers.Permute((3, 1, 2))(cse) cse = layers.Multiply()([init, cse]) return cse
def _build_vae(self): z_mu, z_log_var = self._build_encoder_layers() z_mu, z_log_var = KLDivergenceLayer()([z_mu, z_log_var]) z_sigma = KL.Lambda(lambda t: K.exp(0.5 * t))(z_log_var) #generate the epsilon batch_size = K.shape(z_mu)[0] dimension_size = K.shape(z_mu)[1] epsilon = KL.Input(tensor=K.random_normal( stddev=1.0, shape=(batch_size, dimension_size))) #put it together z_epsilon = KL.Multiply()([z_sigma, epsilon]) z = KL.Add()([z_mu, z_epsilon]) #bring in the decoder decoder = self._build_decoder_layers() out = decoder(z) #make a keras.Model out of it vae = Model(inputs=(self.inputs, epsilon), outputs=out) #Make the loss function. the total loss is the reconstruction loss #(output compared the input) plus the KLDicergence. #The KL divergence is added py the KLDivergenceLayer we defined. def nll(y_true, y_pred): #define keras loss function. Negative Log Likelihood return K.sum(K.binary_crossentropy(y_true, y_pred), axis=-1) #compile and return vae.compile(optimizer='rmsprop', loss=nll) vae.summary() #return also the endoer and decoder in the model as accessible object for plotting and predicting encoder = Model(self.inputs, z_mu) return encoder, decoder, vae
def spectral_attention(filters, classes, x): """ Spectral attention layers: pool the feature maps and apply weak attention with a softmax multi-head output Args: filters: Number of incoming conv filters from main convolution blocks classes: Number of classes for one-hot softmax layers x: keras.model object Returns: x: keras.model object output: softmax attention layer """ #Global average pool attention_layers = layers.GlobalAveragePooling2D()(x) attention_layers = layers.Reshape((1, 1, filters))(attention_layers) # Weak Attention with adaptive filter size based on depth of incoming feature map. Label 1,2,3 shallow -> deep if filters == 32: label = 1 kernel_size = 3 elif filters == 64: label = 2 kernel_size = 5 elif filters == 128: label = 3 kernel_size = 7 else: raise ValueError( "Unknown incoming kernel size {} for attention layers".format( kernel_size)) attention_layers = layers.Conv2D(filters, (kernel_size, kernel_size), padding="SAME", activation="relu")(attention_layers) attention_layers = layers.Conv2D(filters, (kernel_size, kernel_size), padding="SAME", activation="sigmoid")(attention_layers) #Elementwise multiplication of attention with incoming feature map, expand among spatial dimension in 2D attention_layers = layers.Multiply()([x, attention_layers]) #Add a classfication branch with max pool based on size of the layer if filters == 32: pool_size = (4, 4) elif filters == 64: pool_size = (2, 2) elif filters == 128: pool_size = (1, 1) else: raise ValueError("Unknown filter size for max pooling") class_pool = layers.MaxPool2D(pool_size)(attention_layers) class_pool = layers.Flatten( name="spectral_pooling_filters_{}".format(filters))(class_pool) output = layers.Dense( classes, activation="softmax", name="spectral_attention_{}".format(label))(class_pool) return attention_layers, output, class_pool
def call(self, inputs): X = inputs w, h = K.int_shape(X)[1], K.int_shape(X)[2] H = X for layer in self.layers: H = layer(H) H = layers.Reshape((w, h, self.c))(layers.RepeatVector(w * h)(H)) return layers.Multiply()([X, H])
def __init__(self, input_shape, output_shape): self._tanh = layers.Dense(output_shape, input_shape=(input_shape, ), activation='tanh') self._sigmoid = layers.Dense(output_shape, input_shape=(input_shape, ), activation='sigmoid') self._multiply = layers.Multiply()
def add_GLU(input): #split the input in to 2 tensors half1, half2 = layers.Lambda( lambda x: tf.split(x, num_or_size_splits=2, axis=3))(input) #multiply the two halves together and return # This is how GLU is defined line 49 tf_lib return layers.Multiply()([half1, (layers.Activation("sigmoid")(half1))])
def __init__(self, c, **kwargs): super().__init__(**kwargs) self.c = c self.layers = [ layers.Dense(self.c), layers.Dense(self.c), layers.Multiply(), layers.Add() ]
def func(inputs): dims = len(inputs.shape) per = list(range(2, dims))+[1] a = kl.Permute(per, name='%s_permute0'%name)(inputs) probs = kl.Dense(inputs.shape[1], activation='softmax', name='%s_fc'%name)(a) per = [dims-1] + list(range(1, dims-1)) probs = kl.Permute(per, name='%s_permute1'%name)(probs) outputs = kl.Multiply(name='%s_out'%name)([inputs, probs]) return outputs, probs
def Squeeze_Excitation_Module(input, filters, reduction_ratio, name="SE"): sq = layers.GlobalAvgPool2D(name=name+"_Squeeze")(input) ex1 = layers.Dense(filters//reduction_ratio, activation='relu', name=name+"_Excitation_1")(sq) ex2 = layers.Dense(filters, activation='sigmoid', name=name+"_Excitation_2")(ex1) ex = layers.Reshape([1, 1, filters], name=name+"_Reshape")(ex2) out = layers.Multiply(name=name+"_Multiply")([input, ex]) return out
def _squeeze(self, x): x_copy = x channel = backend.int_shape(x)[-1] x = layers.GlobalAveragePooling2D()(x) x = layers.Dense(channel, activation="relu")(x) x = layers.Dense(channel, activation="hard_sigmoid")(x) x = layers.Reshape((1, 1, channel))(x) x = layers.Multiply()([x_copy, x]) return x
def f(x): p = layers.GlobalAveragePooling2D(dtype=config.policy)(x) filters = int(p.shape[1]) d0 = dense(filters // 4, config)(p) d1 = dense(filters, config, activation=layers.Lambda(activations.sigmoid, dtype=config.policy))(d0) d1 = layers.Reshape((1, 1, -1), dtype=config.policy)(d1) return layers.Multiply(dtype=config.policy)([x, d1])
def call(self, x): dim = K.int_shape(x)[-1] # Transform gate operation transform_gate = self.dense_1(x) transform_gate = layers.Activation("sigmoid")(transform_gate) if self.transform_dropout: transform_gate = layers.Dropout(self.transform_dropout)(transform_gate) # Carry gate operation - determine how much to feedforward carry_gate = layers.Lambda(lambda x: 1.0 - x, output_shape=(dim,))(transform_gate) transformed_data = self.dense_2(x) transformed_data = layers.Activation(self.activation)(transformed_data) transformed_gated = layers.Multiply()([transform_gate, transformed_data]) identity_gated = layers.Multiply()([carry_gate, x]) value = layers.Add()([transformed_gated, identity_gated]) return value
def __init__(self, out_channel, strides=1, downsample=False, reduce_ratio=2, use_se_block=False, **kwargs): """ :param out_channel: 输出通道 :param strides: 卷积步长 :param downsample: 是否进行下采样 :param kwargs: 变长层名字 """ super(SEBottleneckBlock, self).__init__(**kwargs) self.downsample = downsample self.shortcut_conv = layers.Conv2D(out_channel * 4, kernel_size=1, strides=strides, use_bias=False) self.shortcut_bn = layers.BatchNormalization() self.conv1 = layers.Conv2D(out_channel, kernel_size=1, strides=strides, padding="SAME", use_bias=False) self.bn1 = layers.BatchNormalization() self.conv2 = layers.Conv2D(out_channel, kernel_size=3, strides=1, padding="SAME", use_bias=False) self.bn2 = layers.BatchNormalization() self.conv3 = layers.Conv2D(out_channel * 4, kernel_size=1, strides=1, padding="SAME", use_bias=False) self.bn3 = layers.BatchNormalization() self.relu = layers.ReLU() self.add = layers.Add() self.se = use_se_block self.avg_pool = layers.GlobalAveragePooling2D() self.reshape = layers.Reshape((1, 1, out_channel)) self.fc_1 = layers.Dense(out_channel // reduce_ratio, activation='relu') self.fc_2 = layers.Dense(out_channel, activation='sigmoid') self.scale = layers.Multiply()
def SqueezeExcitationLayer(x_init, ratio=16): channels = tf.keras.backend.int_shape(x_init)[-1] x = layers.GlobalAveragePooling2D()(x_init) x = layers.Dense(channels / ratio, activation="relu")(x) # Bottleneck x = layers.Dropout(0.5)(x) x = layers.Dense(channels, activation="sigmoid")(x) x = layers.Dropout(0.5)(x) x = layers.Multiply()([x_init, x]) return x
def VAE_2(): #,batch_size= _BatchSize # inputs = Input(shape=(28*28), name='encoder_input',batch_size= _BatchSize) # 使用方法3时 指定batch_size inputs = Input(shape=(28 * 28), name='encoder_input') x = layers.Dense(128, activation='relu')(inputs) z_mean = layers.Dense(2, name='z_mean')(x) z_log_var = layers.Dense(2, name='z_log_var')(x) # 方法1: 直接把采样嵌入到模型中! # 1. 设定一个正太分布 eps = tf.random.normal((tf.shape(z_mean)[0],tf.shape(z_mean)[1])) # 2. 获得标准方差 std = tf.exp(z_log_var) # 3. 通过元素乘法进行采样 Sample_Z = layers.Add()([z_mean, layers.Multiply()([eps, std])]) # 方法2: 使用匿名函数Lambda配合sampling函数对层中每一个元素都进行操作 # Sample_Z = layers.Lambda(sampling, name='z')([z_mean, z_log_var]) # 方法3: 自定义子类: 抽样层,但是此法和嵌入模型中没有区别,注意 使用此法 需要在两个Input函数中指定 batchsize = _BatchSize # Sample_Z = Sample(z_log_var)(z_mean,z_log_var) # instantiate encoder model encoder = Model(inputs, [z_mean, z_log_var, Sample_Z], name='encoder') # encoder.summary() # build decoder model # latent_inputs = Input(shape=(2), name='z_sampling',batch_size= _BatchSize) # 使用方法3时指定batch_size latent_inputs = Input(shape=(2), name='z_sampling') x = layers.Dense(128, activation='relu')(latent_inputs) outputs = layers.Dense(28*28, activation='sigmoid')(x) # instantiate decoder model decoder = Model(latent_inputs, outputs, name='decoder') # decoder.summary() # instantiate VAE model outputs = decoder(encoder(inputs)[2]) vae = Model(inputs = inputs, outputs = outputs, name='vae_mlp') # 加入loss reconstruction_loss = tf.keras.losses.BinaryCrossentropy(reduction='sum',name='binary_crossentropy')(inputs, outputs) kl_loss = 1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var) kl_loss =-0.5 * tf.reduce_mean(kl_loss) # 如果这里的 kl_loss =-0.5 * tf.reduce_sum(kl_loss) 那么就会发生和第一个里面一样的错误 vae.add_loss(kl_loss) vae.add_metric(kl_loss, name='kl_loss',aggregation='mean') vae.add_loss(reconstruction_loss) vae.add_metric(reconstruction_loss, name='mse_loss',aggregation='mean') return vae,encoder,decoder
def __init__(self, num_students, num_skills, max_sequence_length, embed_dim=200, hidden_units=100, dropout_rate=0.2): x = tf.keras.Input(shape=(max_sequence_length, num_skills * 2), name='x') q = tf.keras.Input(shape=(max_sequence_length, num_skills), name='q') emb = layers.Dense( embed_dim, trainable=False, kernel_initializer=tf.keras.initializers.RandomNormal(seed=777), input_shape=(None, max_sequence_length, num_skills * 2)) mask = layers.Masking(mask_value=0, input_shape=(max_sequence_length, embed_dim)) lstm = layers.LSTM(hidden_units, return_sequences=True) out_dropout = layers.TimeDistributed(layers.Dropout(dropout_rate)) out_sigmoid = layers.TimeDistributed( layers.Dense(num_skills, activation='sigmoid')) dot = layers.Multiply() # HACK: the shape of q does not fit to Timedistributed operation(may be correct?) # dot = layers.TimeDistributed(layers.Multiply()) reduce_sum = layers.Dense( 1, trainable=False, kernel_initializer=tf.keras.initializers.constant(value=1), input_shape=(None, max_sequence_length, num_skills)) # reshape layer does not work as graph # reshape_l = layers.Reshape((-1,6),dynamic=False)#, final_mask = layers.TimeDistributed(layers.Masking( mask_value=0, input_shape=(None, max_sequence_length, 1)), name='outputs') # define graph n = emb(x) masked_n = mask(n) h = lstm(masked_n) o = out_dropout(h) y_pred = out_sigmoid(o) y_pred = dot([y_pred, q]) # HACK: without using layer(tf.reduce) might be faster # y_pred = reduce_sum(y_pred, axis=2) y_pred = reduce_sum(y_pred) outputs = final_mask(y_pred) # KEEP: another approach for final mask # patch initial mask by boolean_mask(tensor, mask) #tf.boolean_mask(y_pred, masked_n._keras_mask) #y_pred._keras_mask=masked_n._keras_mask super().__init__(inputs=[x, q], outputs=outputs, name="DKTModel")
def build(self, input_shape): filters = input_shape[-1] self.squeeze = layers.GlobalAveragePooling2D(keepdims=True) self.reduction = layers.Dense( units=filters // self.ratio, activation="relu", use_bias=False, ) self.excite = layers.Dense(units=filters, activation="sigmoid", use_bias=False) self.multiply = layers.Multiply()
def spatial_squeeze_excite_block(input): sse = layers.Conv2D( 1, (1, 1), activation="sigmoid", padding="same", kernel_initializer="he_normal", use_bias=False, )(input) sse = layers.Multiply()([input, sse]) return sse
def SE_block(input, r, activation='relu'): channel = input.shape[-1] reduce_filter = int(channel * r) if isinstance(r, float) else r output = layers.GlobalAveragePooling2D()(input) output = layers.Reshape((1, 1, channel))(output) output = layers.Conv2D(filters=reduce_filter, kernel_size=1, activation=activation)(output) output = layers.Conv2D(filters=channel, kernel_size=1, activation='sigmoid')(output) mul = layers.Multiply() return mul([input, output])