def correlation_coefficient_loss(y_true, y_pred): x = y_true y = y_pred mx = K.mean(x) my = K.mean(y) xm, ym = x-mx, y-my r_num = K.sum(tf.multiply(xm,ym)) r_den = K.sqrt(tf.multiply(K.sum(K.square(xm)), K.sum(K.square(ym)))) r = r_num / r_den r = K.maximum(K.minimum(r, 1.0), -1.0) return 1 - K.square(r)
def softmax_with_mask(tensor_and_mask): input_tensor, mask_tensor = tensor_and_mask min_tensor = K.min(input_tensor, axis=1, keepdims=True) positive_tensor = (min_tensor - input_tensor) * mask_tensor max_tensor = K.max(positive_tensor, axis=1, keepdims=True) exp_tensor = K.exp(positive_tensor - max_tensor) masked_tensor = exp_tensor * mask_tensor summed_tensor = K.sum(masked_tensor, axis=1, keepdims=True) return masked_tensor / (summed_tensor + 1e-10)
def train(self, data): """Pretrain the latent layers of the model.""" # network parameters original_dim = data.shape[1] input_shape = (original_dim, ) batch_size = train_params.batch_size latent_dim = train_params.num_latent epochs = train_params.num_epochs # build encoder model inputs = Input(shape=input_shape, name='encoder_input') inputs_noisy = inputs z_mean = Dense(latent_dim, activation=None, name='z_mean') z_mean = z_mean(inputs_noisy) z_log_sigma = Dense(latent_dim, activation=None, name='z_log_sigma') z_log_sigma = z_log_sigma(inputs_noisy) z = Lambda(self.sampling, output_shape=(latent_dim, ), name='z')([z_mean, z_log_sigma]) encoder = Model(inputs, [z_mean, z_log_sigma, z], name='encoder') # build decoder model latent_inputs = Input(shape=(latent_dim, ), name='z_sampling') outputs = Dense(original_dim, activation='sigmoid', name="decoder_l")(latent_inputs) decoder = Model(latent_inputs, outputs, name='decoder') # Build the DAE outputs = decoder(encoder(inputs)[2]) latent_model = Model(inputs, outputs, name='vae_mlp') reconstruction_loss = binary_crossentropy(inputs, outputs) * original_dim kl_loss = 1 + z_log_sigma - K.square(z_mean) - K.exp(z_log_sigma) kl_loss = K.sum(kl_loss, axis=-1) kl_loss *= -0.5 vae_loss = K.mean(reconstruction_loss + kl_loss) latent_model.add_loss(vae_loss) latent_model.compile(optimizer='adam') saver = ModelCheckpoint(check_path(TEMPORARY_LATENT_PATH), save_weights_only=True, verbose=1) tensorboard_config = TensorBoard( log_dir=check_path(TEMPORARY_LATENT_PATH)) logger.info("Model checkpoints has ben saved.") # train the autoencoder latent_model.fit(data, epochs=epochs, batch_size=batch_size, callbacks=[saver, tensorboard_config]) # Collect the weights for z_log_sigma and z_mean, the layers being pretrained. self.weights.append( latent_model.get_layer("encoder").get_layer( "z_mean").get_weights()) self.weights.append( latent_model.get_layer("encoder").get_layer( "z_log_sigma").get_weights()) self.de_weights.append( latent_model.get_layer("decoder").get_layer( "decoder_l").get_weights()) logger.info("Weights has been updated successfully.")
def compute_mask_loss(boxes, masks, annotations, masks_target, width, height, iou_threshold=0.5, mask_size=(28, 28)): """compute overlap of boxes with annotations""" iou = overlap(boxes, annotations) argmax_overlaps_inds = K.argmax(iou, axis=1) max_iou = K.max(iou, axis=1) # filter those with IoU > 0.5 indices = tf.where(K.greater_equal(max_iou, iou_threshold)) boxes = tf.gather_nd(boxes, indices) masks = tf.gather_nd(masks, indices) argmax_overlaps_inds = K.cast(tf.gather_nd(argmax_overlaps_inds, indices), 'int32') labels = K.cast(K.gather(annotations[:, 4], argmax_overlaps_inds), 'int32') # make normalized boxes x1 = boxes[:, 0] y1 = boxes[:, 1] x2 = boxes[:, 2] y2 = boxes[:, 3] boxes = K.stack([ y1 / (K.cast(height, dtype=K.floatx()) - 1), x1 / (K.cast(width, dtype=K.floatx()) - 1), (y2 - 1) / (K.cast(height, dtype=K.floatx()) - 1), (x2 - 1) / (K.cast(width, dtype=K.floatx()) - 1), ], axis=1) # crop and resize masks_target # append a fake channel dimension masks_target = K.expand_dims(masks_target, axis=3) masks_target = tf.image.crop_and_resize( masks_target, boxes, argmax_overlaps_inds, mask_size ) masks_target = masks_target[:, :, :, 0] # remove fake channel dimension # gather the predicted masks using the annotation label masks = tf.transpose(masks, (0, 3, 1, 2)) label_indices = K.stack([tf.range(K.shape(labels)[0]), labels], axis=1) masks = tf.gather_nd(masks, label_indices) # compute mask loss mask_loss = K.binary_crossentropy(masks_target, masks) normalizer = K.shape(masks)[0] * K.shape(masks)[1] * K.shape(masks)[2] normalizer = K.maximum(K.cast(normalizer, K.floatx()), 1) mask_loss = K.sum(mask_loss) / normalizer return mask_loss
def call(self, x, mask=None): # print(x[0].shape) # print(x[1].shape) # x[0] is Nx2, x[1] is Nx8 onehot, self.centers is 8x2 delta_centers = K.dot(K.transpose(x[1]), (K.dot(x[1], self.centers) - x[0])) # 8x2 center_counts = K.sum(K.transpose(x[1]), axis=1, keepdims=True) + 1 # 8x1 delta_centers /= center_counts new_centers = self.centers - self.alpha * delta_centers self.add_update((self.centers, new_centers), x) # self.add_update((self.counter, self.counter + 1), x) self.result = x[0] - K.dot(x[1], self.centers) self.result = K.sum(self.result**2, axis=1, keepdims=True) # / K.dot(x[1], center_counts) return self.result # Nx1
def call(self, inputs, mask=None): steps_axis = 1 if self.data_format == 'channels_last' else 2 if mask is not None: mask = math_ops.cast(mask, backend.floatx()) mask = array_ops.expand_dims( mask, 2 if self.data_format == 'channels_last' else 1) inputs *= mask return backend.sum(inputs, axis=steps_axis)
def call(self, x): print(x) features_dim = x.shape[-1].value step_dim = x.shape[-2].value # print(K.reshape(self.kernel, (-1, features_dim))) # n, d # print(K.reshape(self.W, (features_dim, 1))) # w= dx1 # print(K.dot(K.reshape(self.kernel, (-1, features_dim)), K.reshape(self.W, (features_dim, 1)))) # nx1 eij = K.reshape( K.dot(K.reshape(self.kernel, (-1, features_dim)), K.reshape(self.W, (features_dim, 1))), (-1, step_dim + self.windows)) print(eij) eij += self.b eij = K.tanh(eij) a = K.exp(eij) a = K.reshape(a, (step_dim + self.windows, 1)) print(a) temp = a[0:self.windows, ] print(temp) temp /= K.cast( K.sum(temp, axis=0, keepdims=True) + K.epsilon(), K.floatx()) weighted_input = self.kernel[0:self.windows, ] * temp alltemp = K.sum(weighted_input, axis=0, keepdims=True) for i in range(self.windows // 2 + 1, step_dim + self.windows // 2): temp = a[i - self.windows // 2:i + self.windows // 2, ] temp /= K.cast( K.sum(temp, axis=0, keepdims=True) + K.epsilon(), K.floatx()) weighted_input = self.kernel[i - self.windows // 2:i + self.windows // 2, ] * temp temp = K.sum(weighted_input, axis=0, keepdims=True) alltemp = keras.layers.concatenate([alltemp, temp], 0) print(alltemp) alltemp = keras.activations.tanh(alltemp) return x + alltemp
def loss(y_true, y_pred): loss_val = -1 * K.sum( K.log(K.softmax(y_pred[:, :-1])) * y_true[:, :-1], axis=-1) return K.mean( K.switch( K.equal(task, 1005), loss_weights[task] * loss_val, K.switch(K.equal(y_true[:, -1], task), loss_val, loss_weights[task] * loss_val)))
def custom_binary_crossentropy(y_true, y_pred): y_pred = ops.convert_to_tensor(y_pred) y_true = math_ops.cast(y_true, y_pred.dtype) epsilon_ = K._constant_to_tensor(K.epsilon(), y_pred.dtype.base_dtype) output = clip_ops.clip_by_value(y_pred, epsilon_, 1.0 - epsilon_) # Compute cross entropy from probabilities. bce = 4 * y_true * math_ops.log(output + K.epsilon()) bce += (1 - y_true) * math_ops.log(1 - output + K.epsilon()) return K.sum(-bce, axis=-1)
def loss_2nd(y_true, y_pred): print(y_true) b_ = K.ones_like(y_true) b_[y_true != 0] = beta x = K.square((y_true - y_pred) * b_) t = K.sum( x, axis=-1, ) return K.mean(t)
def _each(b_17): pT = tf.constant([0.8], dtype=tf.float32) pN = tf.constant([0.2], dtype=tf.float32) output = b_17 * init_max_value logging.getLogger().info("--_each\n %s" % output) output = K.sum(output, axis=None, keepdims=False) logging.getLogger().info("--sum\n %s" % output) output = tf.cond(tf.greater(output, init_max_hv), lambda: pT, lambda: pN) return output
def create_inital_state(inputs, hidden_size): # hidden_size=64 # We are not using initial states, but need to pass something to K.rnn funciton fake_state = K.zeros_like( inputs) # [b,64,512]<= (batch_size, enc_seq_len, latent_dim) fake_state = K.sum(fake_state, axis=[1, 2]) # <= (batch_size) fake_state = K.expand_dims(fake_state) # <= (batch_size, 1) fake_state = tile( fake_state, [1, hidden_size]) # <= (batch_size, latent_dim) (b,64) return fake_state
def call(self, inputs): mu, log_var = inputs kl_batch = -.5 * backend.sum( 1 + log_var - backend.square(mu) - backend.exp(log_var), axis=-1) self.add_loss(backend.mean(kl_batch), inputs=inputs) return inputs
def triplet_loss(inputs, dist='sqeuclidean', margin='maxplus'): anchor, positive, negative = inputs positive_distance = K.square(anchor - positive) negative_distance = K.square(anchor - negative) if dist == 'euclidean': positive_distance = K.sqrt( K.sum(positive_distance, axis=-1, keepdims=True)) negative_distance = K.sqrt( K.sum(negative_distance, axis=-1, keepdims=True)) elif dist == 'sqeuclidean': positive_distance = K.sum(positive_distance, axis=-1, keepdims=True) negative_distance = K.sum(negative_distance, axis=-1, keepdims=True) loss = positive_distance - negative_distance if margin == 'maxplus': loss = K.maximum(0.0, 1 + loss) elif margin == 'softplus': loss = K.log(1 + K.exp(loss)) return K.mean(loss)
def ppo_loss(y_true, y_pred): eps = 0.2 entropy_loss = 0.001 * K.mean( K.sum(y_pred * K.log(y_pred + 1e-10), axis=1, keepdims=True) ) # Danger : le masque des actions possibles n'est pas pris en compte !!! r = y_pred * y_true / (old_pred * y_true + 1e-10) policy_loss = -K.mean( K.minimum(r * advantages, K.clip(r, 1 - eps, 1 + eps) * advantages) ) return policy_loss + entropy_loss
def softmax(x, axis=1): ndim = K.ndim(x) if ndim == 2: return K.softmax(x) elif ndim > 2: e = K.exp(x - K.max(x, axis=axis, keepdims=True)) s = K.sum(e, axis=axis, keepdims=True) return e / s else: raise ValueError('Cannot apply softmax to a tensor that is 1D')
def call(self, inputs, **kwargs): pair1, pair2 = inputs pair1_shape, pair2_shape = K.shape(pair1), K.shape(pair2) pair1_mask = K.cast(K.squeeze(K.any(K.not_equal(pair1, 0.), axis=(-2, -1), keepdims=True), axis=-1), dtype=pair1.dtype) pair2_mask = K.cast(K.squeeze(K.any(K.not_equal(pair2, 0.), axis=(-2, -1), keepdims=True), axis=-1), dtype=pair2.dtype) pair1_to_lstm = K.reshape(pair1, (-1, pair1.shape[-2], pair1.shape[-1])) pair2_to_lstm = K.reshape(pair2, (-1, pair2.shape[-2], pair2.shape[-1])) batch = K.concatenate([pair1_to_lstm, pair2_to_lstm], axis=0) embedded = super(TestSpeakerEmbedding, self).call(batch) pair1_embed = embedded[:K.shape(pair1_to_lstm)[0]] pair2_embed = embedded[K.shape(pair1_to_lstm)[0]:] pair1_embed = K.reshape(pair1_embed, (pair1_shape[0], pair1_shape[1], -1)) pair2_embed = K.reshape(pair2_embed, (pair2_shape[0], pair2_shape[1], -1)) pair1_embed = pair1_embed * pair1_mask pair2_embed = pair2_embed * pair2_mask pair1_n = K.sum(pair1_mask, axis=1) pair2_n = K.sum(pair2_mask, axis=1) pair1_embed = K.sum(pair1_embed, axis=1) / pair1_n pair2_embed = K.sum(pair2_embed, axis=1) / pair2_n return pair1_embed, pair2_embed
def call(self, inputs, mask=None): # inputs.shape = (batch_size, time_steps, seq_len) x = K.permute_dimensions(inputs, (0, 2, 1)) # x.shape = (batch_size, seq_len, time_steps) # general a = K.softmax(K.tanh(K.dot(x, self.W))) a = K.permute_dimensions(a, (0, 2, 1)) outputs = a * inputs outputs = K.sum(outputs, axis=1) return outputs
def loss_2nd(y_true, y_pred): y_true_numpy = y_true.numpy() b_ = np.ones_like(y_true.numpy()) b_[y_true_numpy != 0] = beta x = K.square((y_true - y_pred) * b_) t = K.sum( x, axis=-1, ) return K.mean(t)
def vae_loss(self, x, x_decoded_mean): _, encoder_mean, encoder_logvar = self.encoder.layers z_mean = encoder_mean(x) z_logvar = encoder_logvar(x) # 1項目の計算 latent_loss = -0.5 * K.sum( 1 + z_logvar - K.square(z_mean) - K.exp(z_logvar), axis=-1) # 2項目の計算 reconst_loss = K.mean(mean_squared_error(x, x_decoded_mean), axis=-1) return latent_loss + reconst_loss
def call(self, x, mask=None): '''mask是上一层的''' '''# using 'mask' you can access the mask passed from the previous layer''' # x [batch_size, seq_len, embedding_size] if self.supports_masking: # mask [batch_size, seq_len] if mask is None: # 先判断是否非零,然后执行OR运算,计算每个序列的有效长度 mask = K.any(K.not_equal(x, 0), -1) # [batch_size, seq_len] mask = K.cast(mask, K.floatx()) return K.sum(x, axis=1) / K.sum(mask, axis=1, keepdims=True) if mask is not None: mask = K.cast(mask, K.floatx()) # [batch_size, embedding_size, seq_len] mask = K.repeat(mask, x.shape[-1].value) # [batch_size, seq_len, embedding_size] mask = tf.transpose(mask, [0, 2, 1]) x = x * mask return K.sum(x, axis=1) / K.sum(mask, axis=1)
def loss_2nd(y_true, y_pred): b_ = K.ones_like(y_true) betas = K.ones_like(y_true) betas = tf.fill(tf.shape(betas), beta) b_ = tf.where(tf.not_equal(y_true, 0), betas, b_) x = K.square((y_true - y_pred) * b_) t = K.sum( x, axis=-1, ) return K.mean(t)
def call(self, x, mask=None): # computes a probability distribution over the timesteps # uses 'max trick' for numerical stability # reshape is done to avoid issue with Tensorflow # and 1-dimensional weights logits = K.dot(x, self.W) x_shape = K.shape(x) logits = K.reshape(logits, (x_shape[0], x_shape[1])) ai = K.exp(logits - K.max(logits, axis=-1, keepdims=True)) # masked timesteps have zero weight if mask is not None: mask = K.cast(mask, K.floatx()) ai = ai * mask att_weights = ai / (K.sum(ai, axis=1, keepdims=True) + K.epsilon()) weighted_input = x * K.expand_dims(att_weights) result = K.sum(weighted_input, axis=1) if self.return_attention: return [result, att_weights] return result
def sparse_accuracy_ignoring_last_label(y_true, y_pred): nb_classes = K.int_shape(y_pred)[-1] y_pred = K.reshape(y_pred, (-1, nb_classes)) y_true = K.one_hot(tf.to_int32(K.flatten(y_true)), nb_classes + 1) unpacked = tf.unstack(y_true, axis=-1) legal_labels = ~tf.cast(unpacked[-1], tf.bool) y_true = tf.stack(unpacked[:-1], axis=-1) return K.sum(tf.to_float(legal_labels & K.equal(K.argmax(y_true, axis=-1), K.argmax(y_pred, axis=-1)))) / K.sum(tf.to_float(legal_labels))
def vae_loss(x, x_decoded_mean): """ Two losses: 1) Reconstruction loss (as specified by binary_crossentropy) 2) KL divergence of the distributions """ xent_loss = ORIGINAL_DIM * losses.binary_crossentropy(x, x_decoded_mean) kl_loss = -0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1) vae_loss = K.mean(xent_loss + kl_loss) return vae_loss
def _lossfunction(self, y_true, y_pred): ny_true = y_true[:, 1] + 2 * y_true[:, 2] + 3 * y_true[:, 3] + 4 * y_true[:, 4] + 5 * y_true[:, 5] ny_pred = y_pred[:, 1] + 2 * y_pred[:, 2] + 3 * y_pred[:, 3] + 4 * y_pred[:, 4] + 5 * y_pred[:, 5] my_true = K.mean(ny_true) my_pred = K.mean(ny_pred) var_true = (ny_true - my_true)**2 var_pred = (ny_pred - my_pred)**2 return -K.sum( (ny_true - my_true) * (ny_pred - my_pred), axis=-1) / (K.sqrt( K.sum(var_true, axis=-1) * K.sum(var_pred, axis=-1)))
def context_step(inputs, states): """ Step function for computing ci using ei """ assert_msg = "States must be an iterable. Got {} of type {}".format(states, type(states)) assert isinstance(states, list) or isinstance(states, tuple), assert_msg # <= batch_size, hidden_size c_i = K.sum(encoder_out_seq * K.expand_dims(inputs, -1), axis=1) if verbose: print('ci>', c_i.shape) return c_i, [c_i]
def call(self, inputs, **kwargs): input_shape = K.int_shape(inputs) sequence_length, d_model = input_shape[-2:] # output of the "sigmoid halting unit" (not the probability yet) halting = K.sigmoid( K.reshape( K.bias_add(K.dot(K.reshape(inputs, [-1, d_model]), self.act_weights['halting_kernel']), self.act_weights['halting_biases'], data_format='channels_last'), [-1, sequence_length])) if self.zeros_like_halting is None: self.initialize_control_tensors(halting) # useful flags step_is_active = K.greater(self.halt_budget, 0) no_further_steps = K.less_equal(self.halt_budget - halting, 0) # halting probability is equal to # a. halting output if this isn't the last step (we have some budget) # b. to remainder if it is, # c. and zero for the steps that shouldn't be executed at all # (out of budget for them) halting_prob = K.switch( step_is_active, K.switch(no_further_steps, self.remainder, halting), self.zeros_like_halting) self.active_steps += K.switch(step_is_active, self.ones_like_halting, self.zeros_like_halting) # We don't know which step is the last, so we keep updating # expression for the loss with each call of the layer self.ponder_cost = (self.act_weights['time_penalty_t'] * K.mean(self.remainder + self.active_steps)) # Updating "the remaining probability" and the halt budget self.remainder = K.switch(no_further_steps, self.remainder, self.remainder - halting) self.halt_budget -= halting # OK to become negative # If none of the inputs are active at this step, then instead # of zeroing them out by multiplying to all-zeroes halting_prob, # we can simply use a constant tensor of zeroes, which means that # we won't even calculate the output of those steps, saving # some real computational time. if self.zeros_like_input is None: self.zeros_like_input = K.zeros_like(inputs, name='zeros_like_input') # just because K.any(step_is_active) doesn't work in PlaidML any_step_is_active = K.greater(K.sum(K.cast(step_is_active, 'int32')), 0) step_weighted_output = K.switch( any_step_is_active, K.expand_dims(halting_prob, -1) * inputs, self.zeros_like_input) if self.weighted_output is None: self.weighted_output = step_weighted_output else: self.weighted_output += step_weighted_output return [inputs, self.weighted_output]
def custom_loss(y_true, y_pred): """Args: y_true -- label vector of shape (batch_size, num_classes)""" samples_per_cluster = K.transpose( K.sum(y_true, axis=0, keepdims=True) + 1) # Add 1 to avoid division by zero centers = K.dot(K.transpose(y_true), features) / samples_per_cluster center_loss = 0.5 * K.sum(K.square(features - K.dot(y_true, centers))) center_dot_combinations = K.dot(centers, K.transpose(centers)) center_dot_combinations_normed = K.sqrt( K.square(center_dot_combinations)) pair_dist = center_dot_combinations / center_dot_combinations_normed # subtract diagonal of pair_dist which only contains ones pair_dist = pair_dist - K.eye(num_classes) pair_dist = pair_dist + 1 pair_dist = K.sum(pair_dist) island_loss = center_loss + pair_dist return categorical_crossentropy(y_true, y_pred) + island_loss
def call(inputs, mask=None): steps_axis = 1 if mask is not None: mask = math_ops.cast(mask, backend.floatx()) input_shape = inputs.shape.as_list() broadcast_shape = [-1, input_shape[steps_axis], 1] mask = array_ops.reshape(mask, broadcast_shape) inputs *= mask return backend.sum(inputs, axis=steps_axis) / (math_ops.reduce_sum(mask, axis=steps_axis)+backend.epsilon()) else: return backend.mean(inputs, axis=steps_axis)
def create_inital_state(inputs, hidden_size): if hidden_size.value is None: hidden_size = 0 # We are not using initial states, but need to pass something to K.rnn funciton fake_state = K.zeros_like( inputs) # <= (batch_size, enc_seq_len, latent_dim fake_state = K.sum(fake_state, axis=[1, 2]) # <= (batch_size) fake_state = K.expand_dims(fake_state) # <= (batch_size, 1) fake_state = K.tile(fake_state, [1, hidden_size]) # <= (batch_size, latent_dim return fake_state
def call(self, inputs, mask=None): steps_axis = 1 if self.data_format == 'channels_last' else 2 if mask is not None: mask = math_ops.cast(mask, backend.floatx()) input_shape = inputs.shape.as_list() broadcast_shape = [-1, input_shape[steps_axis], 1] mask = array_ops.reshape(mask, broadcast_shape) inputs *= mask return backend.sum(inputs, axis=steps_axis) / math_ops.reduce_sum( mask, axis=steps_axis) else: return backend.mean(inputs, axis=steps_axis)
def get_initial_state(self, inputs): # (samples, timesteps, rows, cols, filters) initial_state = K.zeros_like(inputs) # (samples, rows, cols, filters) initial_state = K.sum(initial_state, axis=1) shape = list(self.cell.kernel_shape) shape[-1] = self.cell.filters initial_state = self.cell.input_conv(initial_state, K.zeros(tuple(shape)), padding=self.cell.padding) if hasattr(self.cell.state_size, '__len__'): return [initial_state for _ in self.cell.state_size] else: return [initial_state]