def mean_absolute_tp_max_ratio_error_tanhmap_0_7(y_true, y_pred): # y:[0 ~ 7], map:[0.2 ~ 9.8] # y < 0 , map:[0 ~ 0.2] # y > 7 , map:[9.8 ~ 10] t_map = (K.tanh((y_true - 3.5) * 0.57) + 1.00001) * 5.0 p_map = (K.tanh((y_pred - 3.5) * 0.57) + 1.00001) * 5.0 return K.mean(math_ops.abs(t_map - p_map) * math_ops.maximum(t_map, p_map))
def call(self, inputs): h = K.bias_add(K.dot(inputs, self.fc_kernel), self.fc_bias) relu_h = K.tanh(h) self.mu = K.bias_add(K.dot(relu_h, self.mu_kernel), self.mu_bias) self.logvar = K.bias_add(K.dot(relu_h, self.sigma_kernel), self.sigma_bias) h_z = self.sample_z(self.mu, self.logvar) z = K.bias_add(K.dot(h_z, self.trans_kernel), self.trans_bias) z = K.tanh(z) return z
def call(self, inputs, states, constants): if not isinstance(constants, (list, tuple)): keys = values = constants elif len(constants) == 1: keys = values = constants[0] elif len(constants) == 2: keys, values = constants else: raise ValueError( 'constants can either be a list with keys and values or just attention vectors' ) if not isinstance(states, (list, tuple)): query = states else: query = states[0] query = self._query_transformation(query) repeated_query = K.repeat(query, K.shape(keys)[1]) logits = self._attention_logits_dense(K.tanh(repeated_query + keys)) attention_weights = keras.activations.softmax(logits, axis=1) attention_context = K.sum(attention_weights * values, axis=1, keepdims=False) inputs = inputs + attention_context return self._cell.call(inputs, states)
def call(self, x): print(x) features_dim = x.shape[-1].value step_dim = x.shape[-2].value print(K.reshape(self.kernel, (-1, features_dim))) # n, d print(K.reshape(self.W, (features_dim, 1))) # w= dx1 print(K.dot(K.reshape(self.kernel, (-1, features_dim)), K.reshape(self.W, (features_dim, 1)))) # nx1 eij = K.reshape(K.dot(K.reshape(self.kernel, (-1, features_dim)), K.reshape(self.W, (features_dim, 1))), (-1, step_dim)) # batch,step print(eij) eij += self.b eij = K.tanh(eij) a = K.exp(eij) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) a = tf.transpose(a,(1,0)) print(a) print("x:") print(self.kernel) weighted_input = self.kernel * a # 自动填充为相同的维度相乘 N T K print(weighted_input.shape) temp = K.sum(weighted_input, axis=0) # N K 权重相加 temp = K.tile(K.expand_dims(temp, 0), [step_dim, 1]) temp = keras.layers.concatenate([self.kernel, temp]) temp = K.dot(temp, self.W2) + self.b2 return x + temp
def call(self, x): eij1 = K.reshape( K.dot(K.reshape(x[:, :, 0:768], (-1, self.features_dim)), K.reshape(self.W, (self.features_dim, 1))), (-1, self.step_dim)) eij1 += self.b eij1 = K.expand_dims(eij1) eij2 = K.reshape( K.dot(K.reshape(x[:, :, 768:768*2], (-1, self.features_dim)), K.reshape(self.W, (self.features_dim, 1))), (-1, self.step_dim)) eij2 += self.b eij2 = K.expand_dims(eij2) eij3 = K.reshape( K.dot(K.reshape(x[:, :, 768*2:768*3], (-1, self.features_dim)), K.reshape(self.W, (self.features_dim, 1))), (-1, self.step_dim)) eij3 += self.b eij3 = K.expand_dims(eij3) eij = keras.layers.concatenate([eij1, eij2, eij3], axis=2) print(eij) eij = K.tanh(eij) a = K.exp(eij) a /= K.cast(K.sum(a, axis=2, keepdims=True) + K.epsilon(), K.floatx()) print(a) temp = a[:,:,0:1] * x[:, :, 0:768] + a[:,:,1:2] * x[:, :, 768:768*2] + a[:,:,2:3] * x[:, :, 768*2:768*3] print(temp) return temp
def gelu(x): """ GELU activation, described in paper "Gaussian Error Linear Units (GELUs)" https://arxiv.org/pdf/1606.08415.pdf """ c = math.sqrt(2 / math.pi) return 0.5 * x * (1 + K.tanh(c * (x + 0.044715 * K.pow(x, 3))))
def energy_step(inputs, states): """ Step function for computing energy for a single decoder state inputs: (batchsize * 1 * de_in_dim) states: (batchsize * 1 * de_latent_dim) """ """ Some parameters required for shaping tensors""" en_seq_len, en_hidden = encoder_out_seq.shape[ 1], encoder_out_seq.shape[2] de_hidden = inputs.shape[-1] """ Computing S.Wa where S=[s0, s1, ..., si]""" # <= batch size * en_seq_len * latent_dim W_a_dot_s = K.dot(encoder_out_seq, self.W_a) """ Computing hj.Ua """ U_a_dot_h = K.expand_dims(K.dot(inputs, self.U_a), 1) # <= batch_size, 1, latent_dim """ tanh(S.Wa + hj.Ua) """ # <= batch_size*en_seq_len, latent_dim Ws_plus_Uh = K.tanh(W_a_dot_s + U_a_dot_h) """ softmax(va.tanh(S.Wa + hj.Ua)) """ # <= batch_size, en_seq_len e_i = K.squeeze(K.dot(Ws_plus_Uh, self.V_a), axis=-1) # <= batch_size, en_seq_len e_i = K.softmax(e_i) return e_i, [e_i]
def energy_step(inputs, states): """ Step function for computing energy for a single decoder state """ # input: (batch_size, latent_dim) assert_msg = "States must be a list. However states {} is of type {}".format( states, type(states)) assert isinstance(states, list) or isinstance(states, tuple), assert_msg """ Computing sj.Ua """ # (batch_size, 1, d3) U_a_dot_s = K.expand_dims(K.dot(inputs, self.U_a), 1) if verbose: print('Ua.h>', K.int_shape(U_a_dot_s)) """ tanh(h.Wa + s.Ua) """ # (batch_size, h1*h2*...*hn, d3) = (batch_size, h1*h2*...*hn, d3) + (batch_size, 1, d3) Wh_plus_Us = K.tanh(W_hi + U_a_dot_s) # (batch_size, d3, h1*h2*...*hn) Wh_plus_Us = K.permute_dimensions(Wh_plus_Us, (0, 2, 1)) if verbose: print('Wh+Us>', K.int_shape(Wh_plus_Us)) """ softmax(va.tanh(S.Wa + hj.Ua)) """ # (1, batch_size, h1*h2*...*hn) = (1, d3) . (batch_size, d3, h1*h2*...*hn) Wh_plus_Us_dot_Va = K.dot(self.V_a, Wh_plus_Us) # (batch_size, h1*h2*...*hn) e_i = K.squeeze(Wh_plus_Us_dot_Va, 0) e_i = K.softmax(e_i) if verbose: print('ei>', K.int_shape(e_i)) # (batch_size, h1*h2*...*hn) return e_i, states
def call(self, x, mask=None): eij = dot_product(x, self.W) if self.bias: eij += self.b eij = K.tanh(eij) a = K.exp(eij) # apply mask after the exp. will be re-normalized next if mask is not None: # Cast the mask to floatX to avoid float64 upcasting in theano a *= K.cast(mask, K.floatx()) # in some cases especially in the early stages of training the sum may be almost zero # and this results in NaN's. A workaround is to add a very small positive number ε to the sum. # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx()) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) weighted_input = x * K.expand_dims(a) result = K.sum(weighted_input, axis=1) if self.return_attention: return [result, a] return result
def energy_step(decode_outs, states): # decode_outs(batch,dim) # decoder_seq [N,30,512] 30是字符串长度 en_seq_len, en_hidden = encoder_out_seq.shape[ 1], encoder_out_seq.shape[2] # 30, 512 de_hidden = decode_outs.shape[-1] # W * h_j reshaped_enc_outputs = K.reshape( encoder_out_seq, (-1, en_hidden)) #[b,64,512]=> [b*64,512] # W_a[512x512],reshaped_enc_outputs[b*64,512] => [b*64,512] => [b,64,512] W_a_dot_s = K.reshape(K.dot(reshaped_enc_outputs, self.W_a), (-1, en_seq_len, en_hidden)) # U * S_t - 1,decode_outs[b,512],U_a[512,512] => [b,512] => [b,1,512] U_a_dot_h = K.expand_dims(K.dot(decode_outs, self.U_a), axis=1) # <= batch_size, 1, latent_dim # 这个细节很变态,其实就是完成了decoder的输出复制time(64)个,和encoder的输出【64,512】,相加的过程 # tanh ( W * h_j + U * S_t-1 + b ),[b,64,512] = [b*64,512] reshaped_Ws_plus_Uh = K.tanh( K.reshape(W_a_dot_s + U_a_dot_h, (-1, en_hidden))) # V * tanh ( W * h_j + U * S_t-1 + b ), [b*64,512]*[512,1] => [b*64,1] => [b,64] e_i = K.reshape(K.dot(reshaped_Ws_plus_Uh, self.V_a), (-1, en_seq_len)) e_i = K.softmax(e_i) return e_i, [e_i]
def energy_step(inputs, states): assert_msg = "States must be a list. However states {} is of type {}".format( states, type(states)) assert isinstance(states, list) or isinstance(states, tuple), assert_msg en_seq_len, en_hidden = encoder_out_seq.shape[ 1], encoder_out_seq.shape[2] de_hidden = inputs.shape[-1] reshaped_enc_outputs = K.reshape(encoder_out_seq, (-1, en_hidden)) W_a_dot_s = K.reshape(K.dot(reshaped_enc_outputs, self.W_a), (-1, en_seq_len, en_hidden)) if verbose: print('wa.s > ', W_a_dot_s.shape) U_a_dot_h = K.expand_dims(K.dot(inputs, self.U_a), 1) # (batch_size, 1, latent_dim) if verbose: print('Ua.h > ', U_a_dot_h.shape) reshaped_Ws_plus_Uh = K.tanh( K.reshape(W_a_dot_s + U_a_dot_h, (-1, en_hidden))) if verbose: print('Ws+Uh > ', reshaped_Ws_plus_Uh.shape) e_i = K.reshape(K.dot(reshaped_Ws_plus_Uh, self.V_a), (-1, en_seq_len)) e_i = K.softmax(e_i) if verbose: print('ei > ', e_i.shape) return e_i, [e_i]
def call(self, x, mask=None): embedding_dim = self.embedding_dim sequence_length = self.sequence_length eij = K.reshape( K.dot(K.reshape(x, (-1, embedding_dim)), K.reshape(self.W, (embedding_dim, 1))), (-1, sequence_length)) if self.bias: eij += self.b eij = K.tanh(eij) a = K.exp(eij) if mask is not None: a *= K.cast(mask, K.floatx()) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) weighted_input = x * K.expand_dims(a) output = K.sum(weighted_input, axis=1) if self.return_attentions: return output, a else: return output
def call(self, inputs, mask=None): # output = softmax(score) k, q = inputs if len(q.shape) == 2: q = K.expand_dims(q, axis=1) # k: (?, K_LEN, EMBED_DIM,) # q: (?, Q_LEN, EMBED_DIM,) # score: (?, Q_LEN, K_LEN,) if self.score_function == 'scaled_dot_product': kt = K.permute_dimensions(k, (0, 2, 1)) qkt = K.batch_dot(q, kt) score = qkt / self.EMBED_DIM elif self.score_function == 'mlp': kq = K.concatenate([k, q], axis=1) kqw2 = K.tanh(K.dot(kq, self.W2)) score = K.permute_dimensions(K.dot(self.W1, kqw2), (1, 0, 2)) elif self.score_function == 'bi_linear': qw = K.dot(q, self.W) kt = K.permute_dimensions(k, (0, 2, 1)) score = K.batch_dot(qw, kt) else: raise RuntimeError('invalid score_function') score = K.softmax(score) # if mask is not None: # score *= K.cast(mask[0], K.floatx()) # output: (?, Q_LEN, EMBED_DIM,) output = K.batch_dot(score, k) return output
def energy_step(inputs, states): """ Step function for computing energy for a single decoder state """ assert_msg = "States must be a list. However states {} is of type {}".format( states, type(states)) assert isinstance(states, list) or isinstance(states, tuple), assert_msg """ Some parameters required for shaping tensors""" en_seq_len, en_hidden = encoder_out_seq.shape[ 1], encoder_out_seq.shape[2] de_hidden = inputs.shape[-1] """ Computing S.Wa where S=[s0, s1, ..., si]""" # <= batch_size*en_seq_len, latent_dim reshaped_enc_outputs = K.reshape(encoder_out_seq, (-1, en_hidden)) # <= batch_size*en_seq_len, latent_dim W_a_dot_s = K.reshape(K.dot(reshaped_enc_outputs, self.W_a), (-1, en_seq_len, en_hidden)) if verbose: print('wa.s>', W_a_dot_s.shape) """ Computing hj.Ua """ U_a_dot_h = K.expand_dims(K.dot(inputs, self.U_a), 1) # <= batch_size, 1, latent_dim if verbose: print('Ua.h >', U_a_dot_h.shape) print('U_a >', self.U_a.shape) print('inputs.shape >', inputs.shape) """ tanh(S.Wa + hj.Ua) """ # <= batch_size*en_seq_len, latent_dim reshaped_Ws_plus_Uh = K.tanh( K.reshape(W_a_dot_s + U_a_dot_h, (-1, en_hidden))) if verbose: print('Ws+Uh>', reshaped_Ws_plus_Uh.shape) """ softmax(va.tanh(S.Wa + hj.Ua)) """ # <= batch_size, en_seq_len e_i = K.reshape(K.dot(reshaped_Ws_plus_Uh, K.tanh(self.V_a)), (-1, en_seq_len)) # <= batch_size, en_seq_len e_i = K.softmax(e_i) if verbose: print('ei>', e_i.shape) K.print_tensor(reshaped_Ws_plus_Uh, message='reshaped_Ws_plus_Uh') K.print_tensor(self.V_a, message='V_a') K.print_tensor(e_i, message='e_i') return e_i, [e_i]
def call(self, inputs, mask=None): ''' :param inputs: a list of tensor of length not larger than 2, or a memory tensor of size BxTXD1. If a list, the first entry is memory, and the second one is query tensor of size BxD2 if any :param mask: the masking entry will be directly discarded :return: a tensor of size BxD1, weighted summing along the sequence dimension ''' if isinstance(inputs, list) and len(inputs) == 2: memory, query = inputs if self.method is None: return memory[:, -1, :] elif self.method == 'cba': hidden = K.dot(memory, self.Wh) + K.expand_dims(K.dot(query, self.Wq), 1) hidden = K.tanh(hidden) s = K.squeeze(K.dot(hidden, self.v), -1) elif self.method == 'ga': s = K.sum(K.expand_dims(K.dot(query, self.Wq), 1) * memory, axis=-1) else: s = K.squeeze(K.dot(memory, self.v), -1) if mask is not None: mask = mask[0] else: if isinstance(inputs, list): if len(inputs) != 1: raise ValueError('inputs length should not be larger than 2') memory = inputs[0] else: memory = inputs if self.method is None: return memory[:, -1, :] elif self.method == 'cba': hidden = K.dot(memory, self.Wh) hidden = K.tanh(hidden) s = K.squeeze(K.dot(hidden, self.v), -1) elif self.method == 'ga': raise ValueError('general attention needs the second input') else: s = K.squeeze(K.dot(memory, self.v), -1) s = K.softmax(s) if mask is not None: s *= K.cast(mask, dtype='float32') sum_by_time = K.sum(s, axis=-1, keepdims=True) s = s / (sum_by_time + K.epsilon()) return K.sum(memory * K.expand_dims(s), axis=1)
def call(self, inputs, mask=None): x = K.permute_dimensions(inputs, (0, 2, 1)) a = K.softmax(K.tanh(K.dot(x, self.W))) a = K.permute_dimensions(a, (0, 2, 1)) outputs = a * inputs outputs = K.sum(outputs, axis=1) return outputs
def call(self, inputs, mask=None): # inputs.shape = (batch_size, time_steps, seq_len) x = K.permute_dimensions(inputs, (0, 2, 1)) # x.shape = (batch_size, seq_len, time_steps) # general a = K.softmax(K.tanh(K.dot(x, self.W))) a = K.permute_dimensions(a, (0, 2, 1)) outputs = a * inputs outputs = K.sum(outputs, axis=1) return outputs
def call(self, inputs, **kwargs): W = K.tanh(self.W_hat) * K.sigmoid(self.M_hat) a = K.dot(inputs, W) if self.nac_only: outputs = a else: m = K.exp(K.dot(K.log(K.abs(inputs) + self.epsilon), W)) g = K.sigmoid(K.dot(inputs, self.G)) outputs = g * a + (1. - g) * m return outputs
def attention(self, x, dw, pw): z = K.separable_conv2d( K.tanh(x), dw, pw, strides=self.strides, padding=self.padding, data_format=self.data_format, dilation_rate=self.dilation_rate) att = math_ops.exp(z)/math_ops.reduce_sum(math_ops.exp(z), [1, 2], keep_dims=True) att = att/math_ops.reduce_max(att, [1, 2], keep_dims=True) return att
def call(self, inputs, **kwargs): query, values, keys = inputs hidden_with_time_axis = K.expand_dims(query, 1) score = self.attention_variable( K.tanh(keys + self.query_layer(hidden_with_time_axis)) ) # TODO Mask option for score with infinity alignment = K.softmax(score, axis=1) attention = alignment * values alignment = K.squeeze(alignment, axis=2) attention = K.sum(attention, axis=1) return attention, alignment
def call(self, h, mask=None): h_shape = K.shape(h) d_w, T = h_shape[0], h_shape[1] logits = K.dot(h, self.w) # w^T h logits = K.reshape(logits, (d_w, T)) alpha = K.exp(logits - K.max(logits, axis=-1, keepdims=True)) # exp # masked timesteps have zero weight if mask is not None: mask = K.cast(mask, K.floatx()) alpha = alpha * mask alpha = alpha / K.sum(alpha, axis=1, keepdims=True) # softmax r = K.sum(h * K.expand_dims(alpha), axis=1) # r = h*alpha^T h_star = K.tanh(r) # h^* = tanh(r) if self.return_attention: return [h_star, alpha] return h_star
def call(self, x): print(x) features_dim = x.shape[-1].value step_dim = x.shape[-2].value # print(K.reshape(self.kernel, (-1, features_dim))) # n, d # print(K.reshape(self.W, (features_dim, 1))) # w= dx1 # print(K.dot(K.reshape(self.kernel, (-1, features_dim)), K.reshape(self.W, (features_dim, 1)))) # nx1 eij = K.reshape( K.dot(K.reshape(self.kernel, (-1, features_dim)), K.reshape(self.W, (features_dim, 1))), (-1, step_dim + self.windows)) print(eij) eij += self.b eij = K.tanh(eij) a = K.exp(eij) a = K.reshape(a, (step_dim + self.windows, 1)) print(a) temp = a[0:self.windows, ] print(temp) temp /= K.cast( K.sum(temp, axis=0, keepdims=True) + K.epsilon(), K.floatx()) weighted_input = self.kernel[0:self.windows, ] * temp alltemp = K.sum(weighted_input, axis=0, keepdims=True) for i in range(self.windows // 2 + 1, step_dim + self.windows // 2): temp = a[i - self.windows // 2:i + self.windows // 2, ] temp /= K.cast( K.sum(temp, axis=0, keepdims=True) + K.epsilon(), K.floatx()) weighted_input = self.kernel[i - self.windows // 2:i + self.windows // 2, ] * temp temp = K.sum(weighted_input, axis=0, keepdims=True) alltemp = keras.layers.concatenate([alltemp, temp], 0) print(alltemp) alltemp = keras.activations.tanh(alltemp) return x + alltemp
def call(self, x, mask=None): features_dim = self.features_dim step_dim = self.step_dim eij = K.reshape( K.dot(K.reshape(x, (-1, features_dim)), K.reshape(self.W, (features_dim, 1))), (-1, step_dim)) if self.bias: eij += self.b eij = K.tanh(eij) a = K.exp(eij) # apply mask after the exp. will be re-normalized next if mask is not None: # Cast the mask to floatX to avoid float64 upcasting in theano a *= K.cast(mask, K.floatx()) # in some cases especially in the early stages of training the sum may be almost zero a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) a = K.expand_dims(a) weighted_input = x * a return K.sum(weighted_input, axis=1)
def call(self, inputs, mask=None): x, u = inputs if u is None: u = self.add_weight(name="u_{:s}".format(self.name), shape=(self.ATTENTION_SIZE, ), initializer="glorot_normal", trainable=True) # u: (?, ATTENTION_SIZE,) # x: (?, MAX_TIMESTEPS, EMBED_SIZE) # ut: (?, MAX_TIMESTEPS, ATTENTION_SIZE) ut = K.tanh(K.dot(x, self.W) + self.b) # at: (?, MAX_TIMESTEPS,) at = K.batch_dot(ut, u) at = K.softmax(at) if mask is not None: at *= K.cast(mask, K.floatx()) # ot: (?, MAX_TIMESTEPS, EMBED_SIZE,) atx = K.expand_dims(at, axis=-1) ot = atx * x # output: (?, EMBED_SIZE,) output = K.sum(ot, axis=1) return output
def energy_step(inputs, states): """ Step function for computing energy for a single decoder state inputs: (batchsize * 1 * de_in_dim) states: (batchsize * 1 * de_latent_dim) """ assert_msg = "States must be an iterable. Got {} of type {}".format(states, type(states)) assert isinstance(states, list) or isinstance(states, tuple), assert_msg """ Some parameters required for shaping tensors""" en_seq_len, en_hidden = encoder_out_seq.shape[1], encoder_out_seq.shape[2] de_hidden = inputs.shape[-1] """ Computing S.Wa where S=[s0, s1, ..., si]""" # <= batch size * en_seq_len * latent_dim W_a_dot_s = K.dot(encoder_out_seq, self.W_a) """ Computing hj.Ua """ U_a_dot_h = K.expand_dims(K.dot(inputs, self.U_a), 1) # <= batch_size, 1, latent_dim if verbose: print('Ua.h>', U_a_dot_h.shape) """ tanh(S.Wa + hj.Ua) """ # <= batch_size*en_seq_len, latent_dim Ws_plus_Uh = K.tanh(W_a_dot_s + U_a_dot_h) if verbose: print('Ws+Uh>', Ws_plus_Uh.shape) """ softmax(va.tanh(S.Wa + hj.Ua)) """ # <= batch_size, en_seq_len e_i = K.squeeze(K.dot(Ws_plus_Uh, self.V_a), axis=-1) # <= batch_size, en_seq_len e_i = K.softmax(e_i) if verbose: print('ei>', e_i.shape) return e_i, [e_i]
def call(self, x, mask=None): features_dim = self.features_dim step_dim = self.step_dim eij = K.reshape( K.dot(K.reshape(x, (-1, features_dim)), K.reshape(self.W, (features_dim, 1))), (-1, step_dim)) if self.bias: eij += self.b eij = K.tanh(eij) a = K.exp(eij) if mask is not None: a *= K.cast(mask, K.floatx()) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) a = K.expand_dims(a) weighted_input = x * a return K.sum(weighted_input, axis=1)
def energy_step(inputs, states): """ Step function for computing energy for a single decoder state inputs: (batchsize * 1 * de_in_dim) states: (batchsize * 1 * de_latent_dim) """ logger.debug("Running energy computation step") if not isinstance(states, (list, tuple)): raise TypeError( f"States must be an iterable. Got {states} of type {type(states)}" ) encoder_full_seq = states[-1] """ Computing S.Wa where S=[s0, s1, ..., si]""" # <= batch size * en_seq_len * latent_dim W_a_dot_s = K.dot(encoder_full_seq, self.W_a) """ Computing hj.Ua """ U_a_dot_h = K.expand_dims(K.dot(inputs, self.U_a), 1) # <= batch_size, 1, latent_dim logger.debug(f"U_a_dot_h.shape = {U_a_dot_h.shape}") """ tanh(S.Wa + hj.Ua) """ # <= batch_size*en_seq_len, latent_dim Ws_plus_Uh = K.tanh(W_a_dot_s + U_a_dot_h) logger.debug(f"Ws_plus_Uh.shape = {Ws_plus_Uh.shape}") """ softmax(va.tanh(S.Wa + hj.Ua)) """ # <= batch_size, en_seq_len e_i = K.squeeze(K.dot(Ws_plus_Uh, self.V_a), axis=-1) # <= batch_size, en_seq_len e_i = K.softmax(e_i) logger.debug(f"ei.shape = {e_i.shape}") return e_i, [e_i]
def energy_step(S_t_1,): # inputs(batch,dim) inputs = _p(S_t_1 ,"energy_step:S_t_1 算能量函数了..........") # S_t_1:[1,20] en_seq_len, en_hidden = encoder_out_seq.shape[1], encoder_out_seq.shape[2] de_hidden = S_t_1.shape[-1] # W * h_j reshaped_enc_outputs = K.reshape(encoder_out_seq, (-1, en_hidden)) W_a_dot_s = K.reshape(K.dot(reshaped_enc_outputs, self.W_a), (-1, en_seq_len, en_hidden)) # U * S_t - 1 U_a_dot_h = K.expand_dims(K.dot(, self.U_a), 1) # <= batch_size, 1, latent_dim # tanh ( W * h_j + U * S_t-1 + b ) reshaped_Ws_plus_Uh = K.tanh(K.reshape(W_a_dot_s + U_a_dot_h, (-1, en_hidden))) # V * tanh ( W * h_j + U * S_t-1 + b ) e_i = K.reshape(K.dot(reshaped_Ws_plus_Uh, self.V_a), (-1, en_seq_len)) # softmax(e_tj) e_i = K.softmax(e_i) e_i = _p(e_i ,"energy_step:e_i") return e_i, [e_i]
def call(self, x): return x * (K.tanh(K.softplus(x)))
def call(self, inputs, states, training=None): h_tm1 = states[0] c_tm1 = states[1] # dropout matrices for input units dp_mask = self.get_dropout_mask_for_cell(inputs, training, count=4) # dropout matrices for recurrent units rec_dp_mask = self.get_recurrent_dropout_mask_for_cell(h_tm1, training, count=4) if 0 < self.dropout < 1.: inputs_i = inputs * dp_mask[0] inputs_f = inputs * dp_mask[1] inputs_c = inputs * dp_mask[2] inputs_o = inputs * dp_mask[3] else: inputs_i = inputs inputs_f = inputs inputs_c = inputs inputs_o = inputs if 0 < self.recurrent_dropout < 1.: h_tm1_i = h_tm1 * rec_dp_mask[0] h_tm1_f = h_tm1 * rec_dp_mask[1] h_tm1_c = h_tm1 * rec_dp_mask[2] h_tm1_o = h_tm1 * rec_dp_mask[3] else: h_tm1_i = h_tm1 h_tm1_f = h_tm1 h_tm1_c = h_tm1 h_tm1_o = h_tm1 (kernel_i, kernel_f, kernel_c, kernel_o) = array_ops.split(self.kernel, 4, axis=3) # (3, 3, input_dim, filters) (recurrent_kernel_i, recurrent_kernel_f, recurrent_kernel_c, recurrent_kernel_o) = array_ops.split(self.recurrent_kernel, 4, axis=3) if self.use_bias: bias_i, bias_f, bias_c, bias_o = array_ops.split(self.bias, 4) else: bias_i, bias_f, bias_c, bias_o = None, None, None, None # input_i: batch x_i = self.input_conv(inputs_i, kernel_i, bias_i, padding=self.padding) x_f = self.input_conv(inputs_f, kernel_f, bias_f, padding=self.padding) x_c = self.input_conv(inputs_c, kernel_c, bias_c, padding=self.padding) x_o = self.input_conv(inputs_o, kernel_o, bias_o, padding=self.padding) h_i = self.recurrent_conv(h_tm1_i, recurrent_kernel_i) h_f = self.recurrent_conv(h_tm1_f, recurrent_kernel_f) h_c = self.recurrent_conv(h_tm1_c, recurrent_kernel_c) h_o = self.recurrent_conv(h_tm1_o, recurrent_kernel_o) i = self.recurrent_activation(x_i + h_i) f = self.recurrent_activation(x_f + h_f) c = f * c_tm1 + i * self.activation(x_c + h_c) o = self.recurrent_activation(x_o + h_o) h = o * self.activation(c) # sa computation m_t_minus_one = states[2] # h, w, filters h_t, c_t = h, c (kernel_hv, kernel_hk, kernel_hq, kernel_mk, kernel_mv) = array_ops.split( self.sa_kernel, 5, axis=3) # kernel_size, filters, 1, turn to one layer if self.use_bias: bias_i, bias_g, bias_o = array_ops.split(self.sa_bias, 3) else: bias_i, bias_g, bias_o = None, None, None v_h = self.sa_conv(h_t, kernel_hv) k_h = self.sa_conv(h_t, kernel_hk) q_h = self.sa_conv(h_t, kernel_hq) k_m = self.sa_conv(m_t_minus_one, kernel_mk) v_m = self.sa_conv(m_t_minus_one, kernel_mv) # h, w, 1 q_h = K.squeeze(q_h, 3) k_m = K.squeeze(k_m, 3) k_h = K.squeeze(k_h, 3) e_m = tf.matmul(q_h, k_m) alpha_m = K.softmax(e_m) e_h = tf.matmul(q_h, k_h) alpha_h = K.softmax(e_h) v_m = K.squeeze(v_m, 3) v_h = K.squeeze(v_h, 3) z_m = tf.matmul(alpha_m, v_m) z_h = tf.matmul(alpha_h, v_h) z_m = K.expand_dims(z_m, 3) z_h = K.expand_dims(z_h, 3) zi = self.sa_conv(K.concatenate((z_h, z_m), 3), self.kernel_z) (kernel_m_zi, kernel_m_hi, kernel_m_zg, kernel_m_hg, kernel_m_zo, kernel_m_ho) = array_ops.split(self.depth_wise_kernel, 6, axis=3) # i = K.sigmoid( K.depthwise_conv2d(zi, kernel_m_zi, padding='same') + K.depthwise_conv2d(h_t, kernel_m_hi, padding='same') + bias_i) g = K.tanh( K.depthwise_conv2d(zi, kernel_m_zg, padding='same') + K.depthwise_conv2d(h_t, kernel_m_hg, padding='same') + bias_g) o = K.sigmoid( K.depthwise_conv2d(zi, kernel_m_zo, padding='same') + K.depthwise_conv2d(h_t, kernel_m_ho, padding='same') + bias_o) m_t = (1 - i) * m_t_minus_one + i * g h_hat_t = m_t * o # sa computation end return h_hat_t, [c_t, h_hat_t, m_t]