def vae_loss(self, x, x_decoded_mean, z, z_mean, z_log_var): gamma = self.compute_p_c_z(z) gamma_t = K.repeat(gamma, self.latent_dim) u_tensor3 = self.compute_u_tensor3() lambda_tensor3 = self.compute_lambda_tensor3() assert z_mean.shape[1:] == ( self.latent_dim, ), 'z_mean.shape[1:] {} != {}'.format( z_mean.shape[1:], (self.latent_dim, )) z_mean_t = K.permute_dimensions(K.repeat(z_mean, self.n_clusters), [0, 2, 1]) assert z_mean_t.shape[1:] == ( self.latent_dim, self.n_clusters), 'z_mean_t.shape[1:] {} != {}'.format( z_mean_t.shape[1:], (self.latent_dim, self.n_clusters)) assert z_log_var.shape[1:] == ( self.latent_dim, ), 'z_log_var.shape[1:] {} != {}'.format( z_log_var.shape[1:], (self.latent_dim, )) z_log_var_t = K.permute_dimensions( K.repeat(z_log_var, self.n_clusters), [0, 2, 1]) assert z_log_var_t.shape[1:] == ( self.latent_dim, self.n_clusters), 'z_log_var_t.shape[1:] {} != {}'.format( z_log_var_t.shape[1:], (self.latent_dim, self.n_clusters)) loss=self.input_dim * losses.binary_crossentropy(x, x_decoded_mean)\ +K.sum(0.5*gamma_t*(self.latent_dim*K.log(math.pi*2)+K.log(lambda_tensor3)+K.exp(z_log_var_t)/lambda_tensor3+K.square(z_mean_t-u_tensor3)/lambda_tensor3),axis=(1,2))\ -0.5*K.sum(z_log_var+1,axis=-1)\ -K.sum(K.log(K.repeat_elements(K.expand_dims(self.theta_p, axis=0),self.batch_size,0))*gamma,axis=-1)\ +K.sum(K.log(gamma)*gamma,axis=-1) return loss
def step(self, x, states): ytm, stm = states # repeat the hidden state to the length of the sequence _stm = K.repeat(stm, self.timesteps) # now multiplty the weight matrix with the repeated hidden state _Wxstm = K.dot(_stm, self.W_a) # calculate the attention probabilities # this relates how much other timesteps contributed to this one. et = K.dot(activations.tanh(_Wxstm + self._uxpb), K.expand_dims(self.V_a)) at = K.exp(et) at_sum = K.sum(at, axis=1) at_sum_repeated = K.repeat(at_sum, self.timesteps) at /= at_sum_repeated # vector of size (batchsize, timesteps, 1) # calculate the context vector context = K.squeeze(K.batch_dot(at, self.x_seq, axes=1), axis=1) # ~~~> calculate new hidden state # first calculate the "r" gate: rt = activations.sigmoid( K.dot(ytm, self.W_r) + K.dot(stm, self.U_r) + K.dot(context, self.C_r) + self.b_r) # now calculate the "z" gate zt = activations.sigmoid( K.dot(ytm, self.W_z) + K.dot(stm, self.U_z) + K.dot(context, self.C_z) + self.b_z) # calculate the proposal hidden state: s_tp = activations.tanh( K.dot(ytm, self.W_p) + K.dot((rt * stm), self.U_p) + K.dot(context, self.C_p) + self.b_p) # new hidden state: st = (1-zt)*stm + zt * s_tp yt = activations.softmax( K.dot(ytm, self.W_o) + K.dot(stm, self.U_o) + K.dot(context, self.C_o) + self.b_o) if self.return_probabilities: return at, [yt, st] else: return yt, [yt, st]
def next_state_function(inputs): SIRH, OD = inputs SIRH = SIRH[0] SIR = SIRH[:, :3] OD = OD[0] # Hospitalized people would not move populations = K.sum(SIR, axis=1) SIR_n = tf.math.divide_no_nan(SIR, K.expand_dims(populations, -1) + delta) N = int(SIR.shape[0]) # As the regional population is affected by previous mobility restrictions, the current move-out population may be more than the total population in very few cases. # Here we force the move-out population <= the total population ratio = tf.math.divide_no_nan(populations, K.sum(OD, axis=1) + delta) ratio = K.expand_dims(ratio, -1) ratio = K.repeat(ratio, N)[:, :, 0] OD = tf.where(ratio < 1, OD * ratio, OD) OD_m = K.expand_dims(OD, axis=-1) OD_m_SIR = OD_m * K.repeat(SIR_n, N) inflow_healthy = K.sum(OD_m_SIR[:, :, 0], axis=0) inflow_infected = K.sum(OD_m_SIR[:, :, 1], axis=0) inflow_all = K.sum(K.sum(OD_m_SIR, axis=-1), axis=0) stay_healthy = SIR[:, 0] - K.sum(OD_m_SIR[:, :, 0], axis=1) stay_infected = SIR[:, 1] - K.sum(OD_m_SIR[:, :, 1], axis=1) stay_all = populations - K.sum(K.sum(OD_m_SIR, axis=-1), axis=1) # The "SIR^" in our paper. SIR = SIR - K.sum(OD_m_SIR, axis=1) + K.sum(OD_m_SIR, axis=0) # infected m_infected = tf.math.divide_no_nan( beta_m * inflow_healthy * inflow_infected, inflow_all + delta) s_infected = tf.math.divide_no_nan( beta_s * stay_healthy * stay_infected, stay_all + delta) new_infected = m_infected + s_infected new_infected = tf.where(new_infected > SIR[:, 0], SIR[:, 0], new_infected) # hospitaled new_hospitaled = gamma * SIR[:, 1] # recovered new_recovered = theta * SIRH[:, 3] # Update SIR SIRH = K.stack([ SIR[:, 0] - new_infected, SIR[:, 1] + new_infected - new_hospitaled, SIR[:, 2] + new_recovered, SIRH[:, 3] + new_hospitaled - new_recovered ], axis=-1) return K.expand_dims(SIRH, 0)
def _loss_tensor(y_true, y_pred): max_val = K.max(y_pred, axis=-2) # temporal axis! max_val = K.repeat(max_val, K.shape(y_pred)[-2]) print(K.eval(max_val)) mask = K.cast(K.equal(max_val, y_pred), K.floatx()) y_pred = mask * y_pred + (1 - mask) * y_true return squared_hinge(y_true, y_pred)
def compute_p_c_z(self, z): assert z.shape[1:] == ( self.latent_dim, ), 'z.shape[1:] {} != {}'.format( z.shape[1:], (self.latent_dim, )) Z = K.permute_dimensions(K.repeat(z, self.n_clusters), [0, 2, 1]) assert Z.shape[1:] == (self.latent_dim, self.n_clusters), 'Z.shape[1:] {} != {}'.format( Z.shape[1:], (self.latent_dim, self.n_clusters)) u_tensor3 = self.compute_u_tensor3() lambda_tensor3 = self.compute_lambda_tensor3() assert self.theta_p.shape == ( self.n_clusters, ), 'self.theta_p.shape {} != {}'.format( self.theta_p.shape, (self.n_clusters, )) theta_tensor3 = K.expand_dims( K.expand_dims(self.theta_p, axis=0), axis=0) * K.ones( (self.batch_size, self.latent_dim, self.n_clusters)) assert theta_tensor3.shape == ( self.batch_size, self.latent_dim, self.n_clusters), 'theta_tensor3.shape {} != {}'.format( theta_tensor3.shape, (self.batch_size, self.latent_dim, self.n_clusters)) p_c_z=K.exp(K.sum((K.log(theta_tensor3)-0.5*K.log(2*math.pi*lambda_tensor3)-\ K.square(Z-u_tensor3)/(2*lambda_tensor3)),axis=1))+1e-10 assert p_c_z.shape[1:] == ( self.n_clusters, ), 'p_c_z.shape[1:] {} != {}'.format( p_c_z.shape[1:], (self.n_clusters, )) return p_c_z / K.sum(p_c_z, axis=-1, keepdims=True)
def call(self, inputs, states, constants): # Separate the state list into the two discrete state vectors. # ytm is the "memory state", stm is the "carry state". ytm, stm = states # We will use the "carry state" to guide the attention mechanism. Repeat it across all # input timesteps to perform some calculations on it. stm_repeated = K.repeat(self.dense_state(stm), self.timesteps) # Now apply our "dense_transform" operation on the sum of our transformed "carry state" # and all encoder states. This will squash the resultant sum down to a vector of size # [batch,timesteps,1] # Note: Most sources I encounter use tanh for the activation here. I have found with this dataset # and this model, relu seems to perform better. It makes the attention mechanism far more crisp # and produces better translation performance, especially with respect to proper sentence termination. combined_stm_input = self.dense_transform( keras.activations.relu(stm_repeated + self.input_seq_shaped)) # Performing a softmax generates a log probability for each encoder output to receive attention. score_vector = keras.activations.softmax(combined_stm_input, 1) # In this implementation, we grant "partial attention" to each encoder output based on # it's log probability accumulated above. Other options would be to only give attention # to the highest probability encoder output or some similar set. context_vector = K.sum(score_vector * self.input_seq, 1) # Finally, mutate the input vector. It will now contain the traditional inputs (like the seq2seq # we trained above) in addition to the attention context vector we calculated earlier in this method. inputs = K.concatenate([inputs, context_vector]) # Call into the super-class to invoke the LSTM math. res = super(AttentionLSTMCell, self).call(inputs=inputs, states=states) # This if statement switches the return value of this method if "attentionMode" is turned on. if (self.attentionMode): return (K.reshape(score_vector, (-1, self.timesteps)), res[1]) else: return res
def call(self, hidden_state, cell_state, X_encoded): """ Args: hidden_state: hidden state `d` of shape (batch_size, p) cell_state: cell state `s` of shape (batch_size, p) X_encoded: the encoder hidden states (batch_size, T, m) Returns: The attention weights for encoder hidden states (beta_t) """ # Equation 12 l = self.v_d( tf.math.tanh( tf.concat( [ self.W_d( K.repeat( tf.concat([hidden_state, cell_state], axis=-1), # -> (batch_size, p * 2) X_encoded.shape[1]) # -> (batch_size, T, p * 2) ), # -> (batch_size, T, m) self.U_d(X_encoded) ], axis=-1) # -> (batch_size, T, m * 2) ) # -> (batch_size, T, m) ) # -> (batch_size, T, 1) # Equation 13 return tf.nn.softmax(l, axis=1)
def call(self, inputs, **kwargs): batch_size, height, width, _ = tf.unstack(tf.shape(inputs),axis=0) pr_boxes = [] for points in self.prior_values: boxes = [] for row in points: stride, box_width, box_height = row if self.padding == 'same': target_height = tf.cast(tf.math.ceil(height / stride) * stride, tf.int32) target_width = tf.cast(tf.math.ceil(width / stride) * stride, tf.int32) else: target_height = tf.cast(tf.math.floor(height / stride) * stride, tf.int32) target_width = tf.cast(tf.math.floor(width / stride) * stride, tf.int32) ys = tf.range(stride // 2, target_height, stride) xs = tf.range(stride // 2, target_width, stride) xs, ys = tf.meshgrid(xs, ys) box_width = tf.ones_like(xs) * box_width box_height = tf.ones_like(ys) * box_height block_centers = tf.stack((xs, ys, box_width, box_height), axis=-1) boxes.append(block_centers) boxes = tf.stack(boxes, axis=2) boxes = tf.reshape(boxes, (-1, 4)) pr_boxes.append(boxes) pr_boxes = tf.concat(pr_boxes, axis=0) pr_boxes = K.repeat(pr_boxes,batch_size) pr_boxes = tf.transpose(pr_boxes, (1, 0, 2)) return pr_boxes
def _time_distributed_dense(x, w, b=None, dropout=None, input_dim=None, output_dim=None, timesteps=None): '''Apply y.w + b for every temporal slice y of x. ''' if not input_dim: # won't work with TensorFlow input_dim = K.shape(x)[2] if not timesteps: # won't work with TensorFlow timesteps = K.shape(x)[1] if not output_dim: # won't work with TensorFlow output_dim = K.shape(w)[1] if dropout: # apply the same dropout pattern at every timestep ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim))) dropout_matrix = K.dropout(ones, dropout) expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps) x *= expanded_dropout_matrix # collapse time dimension and batch dimension together x = K.reshape(x, (-1, input_dim)) x = K.dot(x, w) if b: x = x + b # reshape to 3D tensor x = K.reshape(x, (-1, timesteps, output_dim)) return x
def multA(ip): """ Test Lambda function to apply a scalar gain to an input signal""" signal = ip[0] param = ip[1] return signal * K.repeat(param, 64)
def build_generator(self): def conv2d(x, filters, kernel_size, strides, padding): x = ZeroPadding2D(padding=padding)(x) x = Conv2D(filters, kernel_size, strides, padding='valid', use_bias=False)(x) x = ReLU()(x) x = InstanceNormalization(axis=-1)(x) return x def deconv2d(x, filters, kernel_size, strides, padding): x = UpSampling2D(2)(x) x = Conv2D(filters, kernel_size, strides, padding='same', use_bias=False)(x) x = ReLU()(x) x = InstanceNormalization(axis=-1)(x) return x def down_sampling(x): d1 = conv2d(x, 64, 7, 1, 3) d2 = conv2d(d1, 128, 4, 2, 1) d3 = conv2d(d2, 256, 4, 2, 1) return d3 def bottleneck(x): for _ in range(6): x = conv2d(x, 256, 3, 1, 1) return x def up_sampling(x): u1 = deconv2d(x, 128, 4, 1, 1) u2 = deconv2d(u1, 64, 4, 1, 1) return u2 def output_conv(x): x = ZeroPadding2D(padding=3)(x) x = Conv2D(filters=3, kernel_size=7, strides=1, padding='valid', activation='tanh', use_bias=False)(x) return x input_img = Input(self.input_shape) input_c = Input((self.num_c, )) c = Lambda(lambda x: backend.repeat(x, 128**2))(input_c) c = Reshape(self.input_shape)(c) x = Concatenate()([input_img, c]) down_sampled = down_sampling(input_img) bottlenecked = bottleneck(down_sampled) up_sampled = up_sampling(bottlenecked) out = output_conv(up_sampled) return Model(inputs=[input_img, input_c], outputs=out)
def max_error_entropy(y_actual, y_pred): error = y_actual - y_pred error_T = K.repeat(error, K.shape(error)[0]) mod = tf.cast(tf.shape(y_actual)[0], 'float32') pi = tf.cast(np.pi, 'float32') return -K.sum( K.exp(-K.square(error - error_T) / 2 * kernel_size**2) / (K.sqrt(2 * pi) * kernel_size * mod**2))
def neg_log_likelihood(y_true, y_pred): my_mean = y_pred[:, :5] my_var = y_pred[:, 5:] my_mean_temp = K.repeat(my_mean, K.shape(y_true)[1]) my_var = (K.log(1 + K.exp(my_var))+1e-6) numerateur = K.min(K.square(my_mean_temp-y_true), axis=1) denominateur = 2*my_var result = numerateur*K.pow(denominateur, -1) return K.log(K.square(my_var))/2 + result
def rmse_scale_invariance_log(y_true, y_pred): y_true, y_pred = reshape(y_true, y_pred) y_true, y_pred = clean_y(y_true, y_pred) y_true, y_pred = clean_x(y_true, y_pred) d = K.cast(K.log(y_pred) - K.log(y_true), dtype='float32') a = K.sum(K.log(y_true) - K.log(y_pred), axis=1) / N loss = K.sum(K.square(d + K.repeat(a, 4070)), axis=1) / N return K.mean(loss)
def call(self, inputs, mask=None, **kwargs): if mask is not None: if K.ndim(inputs) != K.ndim(mask): mask = K.repeat(mask, inputs.shape[-1]) mask = tf.transpose(mask, [0, 2, 1]) mask = K.cast(mask, K.floatx()) inputs = inputs * mask sparse_input = tf.reduce_sum(inputs, axis=self.axis, keepdims=False) return sparse_input
def call(self, x, mask=None): if mask is not None: # mask (batch, time) mask = K.cast(mask, K.floatx()) # mask (batch, x_dim, time) mask = K.repeat(mask, x.shape[-1]) # mask (batch, time, x_dim) mask = K.tf.transpose(mask, [0, 2, 1]) x = x * mask return K.sum(x, axis=1) / K.sum(mask, axis=1)
def call(self, hidden, timesteps): hidden_transformed = self.transform_hidden(hidden) hidden_repeated = K.repeat(hidden_transformed, timesteps) input_seq_transformed = self._input_seq_shaped alignment_score = self.calculate_alignment(hidden_repeated, input_seq_transformed) score_vector = softmax(alignment_score, 1) context_vector = K.sum(score_vector * self.input_seq, 1) return context_vector
def call(self, x, mask=None): if mask is not None: if K.ndim(x) != K.ndim(mask): mask = K.repeat(mask, x.shape[-1]) mask = tf.transpose(mask, [0, 2, 1]) mask = K.cast(mask, K.floatx()) x = x * mask return K.sum(x, axis=self.axis) / K.sum(mask, axis=self.axis) else: return K.mean(x, axis=self.axis)
def denominateur(y_true, y_pred): my_mean = y_pred[:, :5] my_var = y_pred[:, 5:] my_mean_temp = K.repeat(my_mean, K.shape(y_true)[1]) my_var = (K.log(1 + K.exp(my_var)) + 1e-6) print(K.print_tensor(my_var)) # return K.mean(K.log(K.square(my_var))/2 + K.min(K.square(my_mean_temp-y_true), axis=1)/(2*K.square(my_var))) +\ # 0.5*K.log(2*np.pi) # return K.mean(K.log(K.square(my_var))/2 + K.min(K.square(my_mean_temp-y_true), axis=1)/(2*K.square(my_var))) + 0.5*K.log(2*np.pi) K.min(K.square(my_mean_temp-y_true), axis=1)/ denominateur = 2*my_var return denominateur
def euclidean_dist(x, y): # x: n * d # y: m * d n = x.shape[0] d = x.shape[1] m = y.shape[0] assert d == y.shape[1] x = K.repeat(x, m) # n * m * d y = K.expand_dims(y, axis=0) # 1 * m * d return K.sum(K.pow(x - y, 2), axis=2) # n * m
def call(self, x): mean = K.mean(x, axis=-1) std = K.std(x, axis=-1) if len(x.shape) == 3: mean = K.permute_dimensions(K.repeat(mean, x.shape.as_list()[-1]), [0, 2, 1]) std = K.permute_dimensions(K.repeat(std, x.shape.as_list()[-1]), [0, 2, 1]) elif len(x.shape) == 2: mean = K.reshape(K.repeat_elements(mean, x.shape.as_list()[-1], 0), (-1, x.shape.as_list()[-1])) std = K.reshape(K.repeat_elements(mean, x.shape.as_list()[-1], 0), (-1, x.shape.as_list()[-1])) return self._g * (x - mean) / (std + self._epsilon) + self._b
def neg_log_likelihood(y_true, y_pred): my_mean = y_pred[:, :5] my_var = y_pred[:, 5:] my_mean_temp = K.repeat(my_mean, K.shape(y_true)[1]) my_var = (K.log(1 + K.exp(my_var))) # return K.mean(K.log(K.square(my_var))/2 + K.min(K.square(my_mean_temp-y_true), axis=1)/(2*K.square(my_var))) +\ # 0.5*K.log(2*np.pi) # return K.mean(K.log(K.square(my_var))/2 + K.min(K.square(my_mean_temp-y_true), axis=1)/(2*K.square(my_var))) + 0.5*K.log(2*np.pi) K.min(K.square(my_mean_temp-y_true), axis=1)/ numerateur = K.min(K.square(my_mean_temp - y_true), axis=1) denominateur = 2 * K.square(my_var + 1e-5) print(K.get_value(numerateur), K.get_value(denominateur)) print(K.get_value(numerateur / denominateur)) return K.mean(numerateur / denominateur)
def compute_lambda_tensor3(self): assert self.lambda_p.shape == ( self.latent_dim, self.n_clusters), 'self.lambda_p.shape {} != {}'.format( self.lambda_p.shape, (self.latent_dim, self.n_clusters)) lambda_tensor3 = K.permute_dimensions( K.repeat(self.lambda_p, self.batch_size), [1, 0, 2]) assert lambda_tensor3.shape == ( self.batch_size, self.latent_dim, self.n_clusters), 'lambda_tensor3.shape {} != {}'.format( lambda_tensor3.shape, (self.batch_size, self.latent_dim, self.n_clusters)) return lambda_tensor3
def _time_distributed_dense(x, w, b=None, dropout=None, input_dim=None, output_dim=None, timesteps=None, training=None): """Apply `y . w + b` for every temporal slice y of x. # Arguments x: input tensor. w: weight matrix. b: optional bias vector. dropout: wether to apply dropout (same dropout mask for every temporal slice of the input). input_dim: integer; optional dimensionality of the input. output_dim: integer; optional dimensionality of the output. timesteps: integer; optional number of timesteps. training: training phase tensor or boolean. # Returns Output tensor. """ if not input_dim: input_dim = K.shape(x)[2] if not timesteps: timesteps = K.shape(x)[1] if not output_dim: output_dim = K.int_shape(w)[1] if dropout is not None and 0. < dropout < 1.: # apply the same dropout pattern at every timestep ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim))) dropout_matrix = K.dropout(ones, dropout) expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps) x = K.in_train_phase(x * expanded_dropout_matrix, x, training=training) # collapse time dimension and batch dimension together x = K.reshape(x, (-1, input_dim)) x = K.dot(x, w) if b is not None: x = K.bias_add(x, b) # reshape to 3D tensor if K.backend() == 'tensorflow': x = K.reshape(x, K.stack([-1, timesteps, output_dim])) x.set_shape([None, None, output_dim]) else: x = K.reshape(x, (-1, timesteps, output_dim)) return x
def call(self, x, mask=None): if mask is not None: # mask (batch, time) mask = K.cast(mask, K.floatx()) if K.ndim(x) != K.ndim(mask): mask = K.repeat(mask, x.shape[-1]) mask = tf.transpose(mask, [0, 2, 1]) x = x * mask if K.ndim(x) == 2: x = K.expand_dims(x) return K.sum(x, axis=self.axis) else: if K.ndim(x) == 2: x = K.expand_dims(x) return K.sum(x, axis=self.axis)
def call(self, x, mask=None): y = K.dot(x, self.att_W) if not self.activation: weights = tf.tensordot(self.att_v, y, axes=[[0], [2]]) elif self.activation == 'tanh': weights = tf.tensordot(self.att_v, K.tanh(y), axes=[[0], [2]]) weights = K.softmax(weights) out = x * K.permute_dimensions(K.repeat(weights, x.shape[2]), [0, 2, 1]) if self.op == 'attsum': out = K.sum(out, axis=1) elif self.op == 'attmean': out = out.sum(axis=1) / mask.sum(axis=1, keepdims=True) return K.cast(out, K.floatx())
def euclidean_dist_mts(x, y): # x: n * L * d # y: m * L * d n = x.shape[0] l = x.shape[1] d = x.shape[2] m = y.shape[0] assert d == y.shape[2] x = K.reshape(x, shape=(n, l * d)) y = K.reshape(y, shape=(m, l * d)) x = K.repeat(x, m) # n * m * d' y = K.expand_dims(y, axis=0) # 1 * m * d' return K.sum(K.pow(x - y, 2), axis=2) # n * m
def _transform(self, X, affine_transformation, output_size): batch_size, num_channels = K.shape(X)[0], K.shape(X)[3] affine_transformation = K.reshape(affine_transformation, (1, 6)) affine_transformation = K.repeat(affine_transformation, n=K.cast(batch_size, dtype='int32')) transformations = K.reshape(affine_transformation, shape=(batch_size, 2, 3)) # transformations = K.cast(affine_transformation[:, 0:2, :], 'float32') regular_grids = self._make_regular_grids(batch_size, *output_size) sampled_grids = K.batch_dot(transformations, regular_grids) interpolated_image = self._interpolate( X, sampled_grids, output_size) # Getting nrgative values in here!!! new_shape = (batch_size, output_size[0], output_size[1], num_channels) interpolated_image = K.reshape(interpolated_image, new_shape) return interpolated_image
def loss(y_true, y_pred): eps = 1e-6 y_true = K.reshape(y_true, [w, h]) gt_points = K.cast(tf.where(y_true > 0.5), dtype=tf.float32) num_gt_points = tf.shape(gt_points)[0] y_pred = K.flatten(y_pred) p = y_pred p_replicated = tf.squeeze(K.repeat(tf.expand_dims(p, axis=-1), num_gt_points)) d_matrix = cdist(all_img_locations, gt_points) num_est_pts = tf.reduce_sum(p) term_1 = (1 / (num_est_pts + eps)) * K.sum(p * K.min(d_matrix, 1)) d_div_p = K.min((d_matrix + eps) / (p_replicated ** alpha + (eps / max_dist)), 0) d_div_p = K.clip(d_div_p, 0, max_dist) term_2 = K.mean(d_div_p, axis=0) return term_1 + term_2
def call(self, x, training=None, mask=None, states=None): """ x.shape=(batch_size,time_step,dim)=(3,10,128),#x is encoder ouput :param Tensor x: Should be the output of the decoder :param Tensor states: last state of the decoder :param Tensor mask: The mask to apply :return: Pointers probabilities """ input_shape = self.input_spec[0].shape en_seq = x #TensorShape([3, 10, 128]) x_input = x[:, input_shape[1] - 1, :] ##只取最后一个时间戳的,TensorShape([3, 128]) #重复一个2D张量。如果x具有shape(samples, dim),并且n是2,则输出将有shape(samples, 2, dim),在第二个维度将数据重复 x_input = K.repeat(x_input, input_shape[1]) #TensorShape([3, 10, 128]) if states: initial_states = states else: initial_states = self.decoder.get_initial_state(x_input) constants = [] '''preprocessed_input.shape TensorShape([64, 10, 128])''' preprocessed_input, _, constants = self.decoder.process_inputs( x_input, initial_states, constants) constants.append(en_seq) #self.step(preprocessed_input,initial_states) ##这里preprocessed_input有时间维度,然后每个时间维度的数据,都要传给step函数调用 ''' k.rnn返回一个元组,(last_output, outputs, new_states),实现了step的递归调用 last_output:shape为(samples, ...) 输出的rnn的最新输出。 outputs:shape为(samples, time, ...)的张量,其中每个条目 outputs[s, t] 是样本 s 在时间 t 的步骤函数输出值。即step的输出,维度为(batch, 10)(无时间维度) new_states:张量列表,步长函数返回的最新状态,shape为(samples, ...)。 ''' last_output, outputs, states = K.rnn( self.step, preprocessed_input, initial_states, go_backwards=self.decoder.lstm.go_backwards, constants=constants, input_length=input_shape[1]) # print('outputs',outputs.shape,outputs)#outputs (batch, 10, 10) return outputs