def call(self, x): assert(K.backend() == 'tensorflow') temp = K.permute_dimensions(x, (0, 2, 1)) for i in range(0, self.attention_depth): temp = K.sigmoid(K.dot(temp, self.Ws[i]) + self.bs[i]) temp = K.permute_dimensions(temp, (0, 2, 1)) estimated_weight = K.squeeze(K.dot(temp, K.expand_dims(self.Wf, -1)), -1) biased_weight = estimated_weight + self.bias non_linear_weight = K.tanh(biased_weight) # For each hidded state calculate how much should it contribute # to the context vector. This is the main part of attention. # In order to convert weights to "probabilities" use a sigmoid # based function: exp(x) / sum(exp(xi)). prob = K.exp(non_linear_weight) # Compute the total sum for each batch. total_sum = K.sum(prob, axis=1, keepdims=True) prob /= K.cast(total_sum, K.floatx()) # Enable this if you want access to internal probabilities. # Should only be used for testing that Attention works as expected. # return prob # Multiply each hidden value by the corresponding probability. prob = K.expand_dims(prob, -1) new_hidden_values = x * prob return K.sum(new_hidden_values, axis=1)
def binary_crossentropy_with_ranking(y_true, y_pred): """ Trying to combine ranking loss with numeric precision""" # first get the log loss like normal logloss = K.mean(K.binary_crossentropy(y_pred, y_true), axis=-1) # next, build a rank loss # clip the probabilities to keep stability y_pred_clipped = K.clip(y_pred, K.epsilon(), 1-K.epsilon()) # translate into the raw scores before the logit y_pred_score = K.log(y_pred_clipped / (1 - y_pred_clipped)) # determine what the maximum score for a zero outcome is y_pred_score_zerooutcome_max = K.max(y_pred_score * (y_true <1)) # determine how much each score is above or below it rankloss = y_pred_score - y_pred_score_zerooutcome_max # only keep losses for positive outcomes rankloss = rankloss * y_true # only keep losses where the score is below the max rankloss = K.square(K.clip(rankloss, -100, 0)) # average the loss for just the positive outcomes rankloss = K.sum(rankloss, axis=-1) / (K.sum(y_true > 0) + 1) # return (rankloss + 1) * logloss - an alternative to try return rankloss + logloss
def precision(y_true, y_pred): # Count positive samples. c1 = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) c2 = K.sum(K.round(K.clip(y_pred, 0, 1))) # How many selected items are relevant? return c1 / (c2 + smooth)
def recall(y_true, y_pred): # Count positive samples. c1 = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) c3 = K.sum(K.round(K.clip(y_true, 0, 1))) # How many relevant items are selected? return c1 / (c3 + smooth)
def call(self, input): for i in range(self.num_layer): if i == 0: cross = Lambda(lambda x: Add()([K.sum(self.W[i] * K.batch_dot(K.reshape(x, (-1, self.input_dim, 1)), x), 1, keepdims = True), self.bias[i], x]))(input) else: cross = Lambda(lambda x: Add()([K.sum(self.W[i] * K.batch_dot(K.reshape(x, (-1, self.input_dim, 1)), input), 1, keepdims = True), self.bias[i], input]))(cross) return Flatten()(cross)
def call(self, inputs, mask=None): if not isinstance(inputs, list) or len(inputs) <= 1: raise TypeError('SpkLifeLongMemory must be called on a list of tensors ' '(at least 2). Got: ' + str(inputs)) # (None(batch), 1), index of speaker target_spk_l = inputs[0] target_spk_l = K.reshape(target_spk_l, (target_spk_l.shape[0], )) if K.dtype(target_spk_l) != 'int32': target_spk_l = K.cast(target_spk_l, 'int32') # (None(batch), embed_dim) spk_vector_l = inputs[1] # Start to update life-long memory based on the learned speech vector # First do normalization spk_vector_eps = K.switch(K.equal(spk_vector_l, 0.), np.spacing(1), spk_vector_l) # avoid zero spk_vector_eps = K.sqrt(K.sum(spk_vector_eps**2, axis=1)) spk_vector_eps = spk_vector_eps.dimshuffle((0, 'x')) spk_vector = T.true_div(spk_vector_l, K.repeat_elements(spk_vector_eps, self.vec_dim, axis=1)) # Store speech vector into life-long memory according to the speaker identity. life_long_mem = T.inc_subtensor(self.life_long_mem[target_spk_l, :], spk_vector) # Normalization for memory life_long_mem_eps = K.switch(K.equal(life_long_mem, 0.), np.spacing(1), life_long_mem) # avoid 0 life_long_mem_eps = K.sqrt(K.sum(life_long_mem_eps**2, axis=1)) life_long_mem_eps = life_long_mem_eps.dimshuffle((0, 'x')) life_long_mem = T.true_div(life_long_mem, K.repeat_elements(life_long_mem_eps, self.vec_dim, axis=1)) # (None(batch), spk_size, embed_dim) return life_long_mem
def sensitivity(y_true, y_pred): y_pred_pos = K.round(K.clip(y_pred, 0, 1)) y_pos = K.round(K.clip(y_true, 0, 1)) tp = K.sum(y_pos * y_pred_pos) pos = K.sum(y_pos) return tp / (pos + K.epsilon())
def specificity(y_true, y_pred): y_pred_neg = 1 - K.round(K.clip(y_pred, 0, 1)) y_neg = 1 - K.round(K.clip(y_true, 0, 1)) tn = K.sum(y_neg * y_pred_neg) neg = K.sum(y_neg) return tn / (neg + K.epsilon())
def get_model(inputdim, outputdim, regularization_strength=0.01, lr=0.000, cosine=False, **kwargs): transformation = Dense(inputdim, init='identity', W_constraint=Orthogonal()) model = Graph() model.add_input(name='embeddings1', input_shape=(inputdim,)) model.add_input(name='embeddings2', input_shape=(inputdim,)) model.add_shared_node(transformation, name='transformation', inputs=['embeddings1', 'embeddings2'], outputs=['transformed1', 'transformed2']) model.add_node(Lambda(lambda x: x[:, :outputdim]), input='transformed1', name='projected1') model.add_node(Lambda(lambda x: -x[:, :outputdim]), input='transformed2', name='negprojected2') if cosine: model.add_node(Lambda(lambda x: x / K.reshape(K.sqrt(K.sum(x * x, axis=1)), (x.shape[0], 1))), name='normalized1', input='projected1') model.add_node(Lambda(lambda x: x / K.reshape(K.sqrt(K.sum(x * x, axis=1)), (x.shape[0], 1))), name='negnormalized2', input='negprojected2') model.add_node(Lambda(lambda x: K.reshape(K.sum(x, axis=1), (x.shape[0], 1))), name='distances', inputs=['normalized1', 'negnormalized2'], merge_mode='mul') else: model.add_node(Lambda(lambda x: K.reshape(K.sqrt(K.sum(x * x, axis=1)), (x.shape[0], 1))), name='distances', inputs=['projected1', 'negprojected2'], merge_mode='sum') model.add_output(name='y', input='distances') model.compile(loss={'y': lambda y, d: K.mean(y * d)}, optimizer=SimpleSGD()) return model
def mutual_info_loss(self, c, c_given_x): """The mutual information metric we aim to minimize""" eps = 1e-8 conditional_entropy = K.mean(- K.sum(K.log(c_given_x + eps) * c, axis=1)) entropy = K.mean(- K.sum(K.log(c + eps) * c, axis=1)) return conditional_entropy + entropy
def __call__(self, x): regularization = 0 if self.l1: regularization += self.l1 * K.sum(K.abs(K.sum(x, axis=self.axis) - 1.)) if self.l2: regularization += self.l2 * K.sum(K.square(K.sum(x, axis=self.axis) - 1.)) return regularization
def loss(self, y_true, y_pred): """ categorical crossentropy loss """ if self.crop_indices is not None: y_true = utils.batch_gather(y_true, self.crop_indices) y_pred = utils.batch_gather(y_pred, self.crop_indices) if self.use_float16: y_true = K.cast(y_true, 'float16') y_pred = K.cast(y_pred, 'float16') # scale and clip probabilities # this should not be necessary for softmax output. y_pred /= K.sum(y_pred, axis=-1, keepdims=True) y_pred = K.clip(y_pred, K.epsilon(), 1) # compute log probability log_post = K.log(y_pred) # likelihood # loss loss = - y_true * log_post # weighted loss if self.weights is not None: loss *= self.weights if self.vox_weights is not None: loss *= self.vox_weights # take the total loss # loss = K.batch_flatten(loss) mloss = K.mean(K.sum(K.cast(loss, 'float32'), -1)) tf.verify_tensor_all_finite(mloss, 'Loss not finite') return mloss
def get_loss(self): loss = 0.0 if self.l1: loss += K.sum(K.abs(self.p)) * self.l1 if self.l2: loss += K.sum(K.square(self.p)) * self.l2 return loss
def build_model(self, p): S = Input(p['input_shape'], name='input_state') A = Input((1,), name='input_action', dtype='int32') R = Input((1,), name='input_reward') T = Input((1,), name='input_terminate', dtype='int32') NS = Input(p['input_shape'], name='input_next_sate') self.Q_model = self.build_cnn_model(p) self.Q_old_model = self.build_cnn_model(p, False) # Q hat in paper self.Q_old_model.set_weights(self.Q_model.get_weights()) # Q' = Q Q_S = self.Q_model(S) # batch * actions Q_NS = disconnected_grad(self.Q_old_model(NS)) # disconnected gradient is not necessary y = R + p['discount'] * (1-T) * K.max(Q_NS, axis=1, keepdims=True) # batch * 1 action_mask = K.equal(Tht.arange(p['num_actions']).reshape((1, -1)), A.reshape((-1, 1))) output = K.sum(Q_S * action_mask, axis=1).reshape((-1, 1)) loss = K.sum((output - y) ** 2) # sum could also be mean() optimizer = adam(p['learning_rate']) params = self.Q_model.trainable_weights update = optimizer.get_updates(params, [], loss) self.training_func = K.function([S, A, R, T, NS], loss, updates=update) self.Q_func = K.function([S], Q_S)
def dice_coef(y_true, y_pred): #print((y_true).shape) y_true_f = K.flatten(y_true) y_pred_f = K.flatten(y_pred) intersection = K.sum(y_true_f * y_pred_f) return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
def custom_loss(y_true, y_pred): mask = K.cast(K.not_equal(y_true, 0), dtype='float32') diff = y_pred - y_true sqdiff = diff * diff * mask sse = K.sum(K.sum(sqdiff)) n = K.sum(K.sum(mask)) return sse / n
def multiplicative_self_attention(units, n_hidden=None, n_output_features=None, activation=None): """ Compute multiplicative self attention for time series of vectors (with batch dimension) the formula: score(h_i, h_j) = <W_1 h_i, W_2 h_j>, W_1 and W_2 are learnable matrices with dimensionality [n_hidden, n_input_features] Args: units: tf tensor with dimensionality [batch_size, time_steps, n_input_features] n_hidden: number of units in hidden representation of similarity measure n_output_features: number of features in output dense layer activation: activation at the output Returns: output: self attended tensor with dimensionality [batch_size, time_steps, n_output_features] """ n_input_features = K.int_shape(units)[2] if n_hidden is None: n_hidden = n_input_features if n_output_features is None: n_output_features = n_input_features exp1 = Lambda(lambda x: expand_tile(x, axis=1))(units) exp2 = Lambda(lambda x: expand_tile(x, axis=2))(units) queries = Dense(n_hidden)(exp1) keys = Dense(n_hidden)(exp2) scores = Lambda(lambda x: K.sum(queries * x, axis=3, keepdims=True))(keys) attention = Lambda(lambda x: softmax(x, axis=2))(scores) mult = Multiply()([attention, exp1]) attended_units = Lambda(lambda x: K.sum(x, axis=2))(mult) output = Dense(n_output_features, activation=activation)(attended_units) return output
def build_untilfinaldense(self, WE=None): opts = self.opts # get options max_features = int(opts["max_features"]) embedding_dims = int(opts["embedding_dims"]) maxlen = int(opts["maxlen"]) act = opts["activation"] hidden_dims = int(opts["hidden_dims"]) dropout = float(opts["dropout"]) # start model = self.model marg1 = Sequential() marg2 = Sequential() if(not WE is None): marg1.add(Embedding(max_features, embedding_dims, input_length=maxlen, weights=[WE])) marg2.add(Embedding(max_features, embedding_dims, input_length=maxlen, weights=[WE])) else: marg1.add(Embedding(max_features, embedding_dims, input_length=maxlen)) marg2.add(Embedding(max_features, embedding_dims, input_length=maxlen)) marg1.add(Dropout(dropout)) marg2.add(Dropout(dropout)) marg1.add(Lambda(lambda x: K.sum(x, axis=1, keepdims=True), output_shape=(1, embedding_dims))) marg2.add(Lambda(lambda x: K.sum(x, axis=1, keepdims=True), output_shape=(1, embedding_dims))) marg1.add(Flatten()) marg2.add(Flatten()) merged = Merge([marg1, marg2], mode='concat') model.add(merged) model.add(Dropout(dropout)) if(hidden_dims > 0): # whether add another dense model.add(Dense(hidden_dims)) model.add(Dropout(dropout)) model.add(Activation(act)) return None
def triplet_loss(y_true, y_pred, alpha=0.2): """ Implementation of the triplet loss as defined by formula (3) Arguments: y_true -- true labels, required when you define a loss in Keras, you don't need it in this function. y_pred -- python list containing three objects: anchor -- the encodings for the anchor images, of shape (None, 128) positive -- the encodings for the positive images, of shape (None, 128) negative -- the encodings for the negative images, of shape (None, 128) Returns: loss -- real number, value of the loss """ anchor, positive, negative = y_pred[0], y_pred[1], y_pred[2] # Step 1: Compute the (encoding) distance between the anchor and the positive, you will need to sum over axis=-1 pos_dist = K.sum(K.square(anchor - positive), axis=-1) # Step 2: Compute the (encoding) distance between the anchor and the negative, you will need to sum over axis=-1 neg_dist = K.sum(K.square(anchor - negative), axis=-1) # Step 3: subtract the two previous distances and add alpha. basic_loss = pos_dist - neg_dist + alpha # Step 4: Take the maximum of basic_loss and 0.0. Sum over the training examples. loss = K.sum(K.maximum(basic_loss, 0)) return loss
def dice_coef(y_true, y_pred, smooth, thresh): #y_pred = (y_pred > thresh).astype(float) y_true_f = KB.flatten(y_true) y_pred_f = KB.flatten(y_pred) intersection = KB.sum(y_true_f * y_pred_f, axis=-1) return (2. * intersection + smooth) / (KB.sum(KB.square(y_true_f), axis=-1) + KB.sum(KB.square(y_pred_f), axis=-1) + smooth)
def get_split_averages(input_tensor, input_mask, indices): # Splits input tensor into three parts based on the indices and # returns average of values prior to index, values at the index and # average of values after the index. # input_tensor: (batch_size, input_length, input_dim) # input_mask: (batch_size, input_length) # indices: (batch_size, 1) # (1, input_length) length_range = K.expand_dims(K.arange(K.shape(input_tensor)[1]), dim=0) # (batch_size, input_length) batched_range = K.repeat_elements(length_range, K.shape(input_tensor)[0], 0) tiled_indices = K.repeat_elements(indices, K.shape(input_tensor)[1], 1) # (batch_size, input_length) greater_mask = K.greater(batched_range, tiled_indices) # (batch_size, input_length) lesser_mask = K.lesser(batched_range, tiled_indices) # (batch_size, input_length) equal_mask = K.equal(batched_range, tiled_indices) # (batch_size, input_length) # We also need to mask these masks using the input mask. # (batch_size, input_length) if input_mask is not None: greater_mask = switch(input_mask, greater_mask, K.zeros_like(greater_mask)) lesser_mask = switch(input_mask, lesser_mask, K.zeros_like(lesser_mask)) post_sum = K.sum(switch(K.expand_dims(greater_mask), input_tensor, K.zeros_like(input_tensor)), axis=1) # (batch_size, input_dim) pre_sum = K.sum(switch(K.expand_dims(lesser_mask), input_tensor, K.zeros_like(input_tensor)), axis=1) # (batch_size, input_dim) values_at_indices = K.sum(switch(K.expand_dims(equal_mask), input_tensor, K.zeros_like(input_tensor)), axis=1) # (batch_size, input_dim) post_normalizer = K.expand_dims(K.sum(greater_mask, axis=1) + K.epsilon(), dim=1) # (batch_size, 1) pre_normalizer = K.expand_dims(K.sum(lesser_mask, axis=1) + K.epsilon(), dim=1) # (batch_size, 1) return K.cast(pre_sum / pre_normalizer, 'float32'), values_at_indices, K.cast(post_sum / post_normalizer, 'float32')
def jaccard_coef(y_true, y_pred): intersection = K.sum(y_true * y_pred, axis=[0, -1, -2]) sum_ = K.sum(y_true + y_pred, axis=[0, -1, -2]) jac = (intersection + smooth) / (sum_ - intersection + smooth) return K.mean(jac)
def call(self, x, mask=None): eij = dot_product(x, self.W) if self.bias: eij += self.b eij = K.tanh(eij) a = K.exp(eij) # apply mask after the exp. will be re-normalized next if mask is not None: # Cast the mask to floatX to avoid float64 upcasting in theano a *= K.cast(mask, K.floatx()) # in some cases especially in the early stages of training the sum may be almost zero # and this results in NaN's. A workaround is to add a very small positive number ε to the sum. # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx()) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) weighted_input = x * K.expand_dims(a) result = K.sum(weighted_input, axis=1) if self.return_attention: return [result, a] return result
def call(self, x, mask=None): # eij = K.dot(x, self.W) TF backend doesn't support it # features_dim = self.W.shape[0] # step_dim = x._keras_shape[1] features_dim = self.features_dim step_dim = self.step_dim eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)), K.reshape(self.W, (features_dim, 1))), (-1, step_dim)) if self.bias: eij += self.b eij = K.tanh(eij) a = K.exp(eij) # apply mask after the exp. will be re-normalized next if mask is not None: # Cast the mask to floatX to avoid float64 upcasting in theano a *= K.cast(mask, K.floatx()) # in some cases especially in the early stages of training the sum may be almost zero a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) a = K.expand_dims(a) weighted_input = x * a # print weigthted_input.shape return K.sum(weighted_input, axis=1)
def weighted_dice_loss(y_true, y_pred, weight): smooth = 1. w, m1, m2 = weight * weight, y_true, y_pred intersection = (m1 * m2) score = (2. * K.sum(w * intersection) + smooth) / (K.sum(w * m1) + K.sum(w * m2) + smooth) loss = 1. - K.sum(score) return loss
def dice_coeff(y_true, y_pred): smooth = 0. y_true_f = K.flatten(y_true) y_pred_f = K.flatten(y_pred) intersection = K.sum(y_true_f * y_pred_f) score = (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth) return score
def sigmoid_cross_entropy(y_true, y_pred): z = K.flatten(y_true) x = K.flatten(y_pred) q = 10 l = (1 + (q - 1) * z) loss = (K.sum((1 - z) * x) + K.sum(l * (K.log(1 + K.exp(- K.abs(x))) + K.max(-x, 0)))) / 500 return loss
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5): '''Return yolo_loss tensor Parameters ---------- yolo_outputs: list of tensor, the output of yolo_body y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(T, 2), wh num_classes: integer ignore_thresh: float, the iou threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,) ''' yolo_outputs = args[:3] y_true = args[3:] anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(3)] loss = 0 m = K.shape(yolo_outputs[0])[0] for l in range(3): object_mask = y_true[l][..., 4:5] true_class_probs = y_true[l][..., 5:] pred_xy, pred_wh, pred_confidence, pred_class_probs = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape) pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet box loss. xy_delta = (y_true[l][..., :2]-pred_xy)*grid_shapes[l][::-1] wh_delta = K.log(y_true[l][..., 2:4]) - K.log(pred_wh) # Avoid log(0)=-inf. wh_delta = K.switch(object_mask, wh_delta, K.zeros_like(wh_delta)) box_delta = K.concatenate([xy_delta, wh_delta], axis=-1) box_delta_scale = 2 - y_true[l][...,2:3]*y_true[l][...,3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write(b, K.cast(best_iou<ignore_thresh, K.dtype(true_box))) return b+1, ignore_mask _, ignore_mask = K.control_flow_ops.while_loop(lambda b,*args: b<m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) box_loss = object_mask * K.square(box_delta*box_delta_scale) confidence_loss = object_mask * K.square(1-pred_confidence) + \ (1-object_mask) * K.square(0-pred_confidence) * ignore_mask class_loss = object_mask * K.square(true_class_probs-pred_class_probs) loss += K.sum(box_loss) + K.sum(confidence_loss) + K.sum(class_loss) return loss / K.cast(m, K.dtype(loss))
def cat_acc(y, z): """Compute categorical accuracy given one-hot matrices.""" weights = _cat_sample_weights(y) _acc = K.cast(K.equal(K.argmax(y, axis=-1), K.argmax(z, axis=-1)), K.floatx()) _acc = K.sum(_acc * weights) / K.sum(weights) return _acc
def __call__(self, x): xshape = K.int_shape(x) if self.division_idx is None: self.division_idx = xshape[-1]/2 x = K.reshape(x, (-1, xshape[-1])) x /= K.sqrt(K.sum(K.square(x), axis=0, keepdims=True)) xx = K.sum(x[:,:self.division_idx] * x[:,self.division_idx:], axis=0) return self.gamma * K.sqrt(K.sum(K.square(xx)) + K.epsilon())
def mean_tcn_squared_error(y_true, y_pred): shape = K.shape(y_pred) return K.mean(K.sum(K.slice(K.square(y_pred - y_true), (0, shape[1] - 1), (shape[0], 1)), axis=-1), axis=-1)
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False): '''Return yolo_loss tensor Parameters ---------- yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(N, 2), wh num_classes: integer ignore_thresh: float, the iou threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,) ''' num_layers = len(anchors)//3 # default setting yolo_outputs = args[:num_layers] y_true = args[num_layers:] anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]] input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers)] loss = 0 m = K.shape(yolo_outputs[0])[0] # batch size, tensor mf = K.cast(m, K.dtype(yolo_outputs[0])) for l in range(num_layers): object_mask = y_true[l][..., 4:5] true_class_probs = y_true[l][..., 5:] grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet raw box to calculate loss. raw_true_xy = y_true[l][..., :2]*grid_shapes[l][::-1] - grid raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf box_loss_scale = 2 - y_true[l][...,2:3]*y_true[l][...,3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write(b, K.cast(best_iou<ignore_thresh, K.dtype(true_box))) return b+1, ignore_mask _, ignore_mask = K.control_flow_ops.while_loop(lambda b,*args: b<m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) # K.binary_crossentropy is helpful to avoid exp overflow. xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(raw_true_xy, raw_pred[...,0:2], from_logits=True) wh_loss = object_mask * box_loss_scale * 0.5 * K.square(raw_true_wh-raw_pred[...,2:4]) confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \ (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask class_loss = object_mask * K.binary_crossentropy(true_class_probs, raw_pred[...,5:], from_logits=True) xy_loss = K.sum(xy_loss) / mf wh_loss = K.sum(wh_loss) / mf confidence_loss = K.sum(confidence_loss) / mf class_loss = K.sum(class_loss) / mf loss += xy_loss + wh_loss + confidence_loss + class_loss if print_loss: loss = tf.Print(loss, [loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum(ignore_mask)], message='loss: ') return loss
def recall(y_true, y_pred): true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) possible_positives = K.sum(K.round(K.clip(y_true, 0, 1))) recall = true_positives / (possible_positives+true_positives ) return recall
def precision(y_true, y_pred): true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1))) precision = true_positives / (predicted_positives+true_positives ) return precision
def custom_r2(y_true, y_pred): baseline = K.sum((y_true - K.mean(y_true))**2) model_fit = K.sum((y_true - y_pred)**2) return 1. - model_fit / baseline
conv = Conv2D(32, (3, 3), padding='same', activation='relu')(pooled) # 32 -> 256 conv = BatchNormalization()(conv) #Attention 3 y = Conv2D(1, (1, 1))(conv) # 32x32x1 y = Permute((3, 2, 1))(y) y = Dense(8, activation='softmax')(y) y = Permute((1, 3, 2))(y) y = Dense(8, activation='softmax')(y) y = Permute((1, 3, 2))(y) #now permute back y = Permute((3, 2, 1))(y) #end attention mult = Multiply()([conv, y]) summed = Lambda(lambda x: K.sum(x, axis=(1, 2)), output_shape=lambda s: (s[0], s[3]))(mult) #Dense network with input of 64 neurons -> hidden -> 10 neurons w/ softmax dense = Dense(256, activation='relu')(summed) dense = Dense(64, activation='relu')(dense) dense = Dense(64, activation='relu')(dense) dense = Dense(64, activation='relu')(dense) final = Dense(10, activation='softmax')(dense) #Finalization model = Model(inputs=img_inputs, outputs=final) #print shapes x = np.array([x_train[0]]) layer_outputs = [layer.output for layer in model.layers] viz_model = Model(input=model.input, output=layer_outputs)
def w_coef_dice(y_true, y_pred, axis=(-3, -2, -1), smooth=0.00001): return K.mean(2. * (K.sum(y_true * y_pred, axis=axis) + smooth/2)/(K.sum(y_true, axis=axis) + K.sum(y_pred, axis=axis) + smooth))
def dice_coef(y_true, y_pred): y_true_f = K.flatten(y_true) y_pred_f = K.flatten(y_pred) intersection = K.sum(y_true_f * y_pred_f) return (2. * intersection + 1) / (K.sum(y_true_f) + K.sum(y_pred_f) + 1)
def dice_coef_rounded(y_true, y_pred): y_true_f = K.flatten(K.round(y_true)) y_pred_f = K.flatten(K.round(y_pred)) intersection = K.sum(y_true_f * y_pred_f) return (2. * intersection + 1) / (K.sum(y_true_f) + K.sum(y_pred_f) + 1)
def precision(y_true, y_pred): # PPV - Positive Predictive Value y_true_f = K.flatten(y_true) y_pred_f = K.flatten(y_pred) intersection = K.sum(y_true_f * y_pred_f) return (intersection + K.epsilon()) / (K.sum(y_pred_f) + K.epsilon())
def _content_loss(self, content, combination): """The content loss is the (scaled, squared) Euclidean distance between feature representations of the content and combination images.""" return backend.sum(backend.square(combination - content))
kv = Average()([k1v, k2v]) t = Add()([t, kv]) po1 = Dense(num_classes, activation='sigmoid')(t) po2 = Dense(num_classes, activation='sigmoid')(t) object_model = Model([t1_in, t2_in, k1_in, k2_in], [po1, po2]) # 输入text和subject,预测object及其关系 train_model = Model([t1_in, t2_in, s1_in, s2_in, k1_in, k2_in, o1_in, o2_in], [ps1, ps2, po1, po2]) s1 = K.expand_dims(s1, 2) s2 = K.expand_dims(s2, 2) s1_loss = K.binary_crossentropy(s1, ps1) s1_loss = K.sum(s1_loss * mask) / K.sum(mask) s2_loss = K.binary_crossentropy(s2, ps2) s2_loss = K.sum(s2_loss * mask) / K.sum(mask) o1_loss = K.sum(K.binary_crossentropy(o1, po1), 2, keepdims=True) o1_loss = K.sum(o1_loss * mask) / K.sum(mask) o2_loss = K.sum(K.binary_crossentropy(o2, po2), 2, keepdims=True) o2_loss = K.sum(o2_loss * mask) / K.sum(mask) loss = (s1_loss + s2_loss) + (o1_loss + o2_loss) train_model.add_loss(loss) train_model.compile(optimizer=Adam(learning_rate)) train_model.summary()
def mean_total_squared_error(y_true, y_pred): return K.mean(K.sum(K.square(y_pred - y_true), axis=-1), axis=-1)
def vae_kl_loss(self, y_true, y_pred): return -0.5 * K.sum( 1 + self.log_var - K.square(self.mu) - K.exp(self.log_var), axis=1)
def coef_dice(y_true, y_pred, smooth=1.): y_true_f = K.flatten(y_true) y_pred_f = K.flatten(y_pred) intersection = K.sum(y_true_f * y_pred_f) return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
"--mse", help=help_, action='store_true') args = parser.parse_args() models = (encoder, decoder) data = (x_test, y_test) # VAE loss = mse_loss or xent_loss + kl_loss if args.mse: reconstruction_loss = mse(inputs, outputs) else: reconstruction_loss = binary_crossentropy(inputs, outputs) reconstruction_loss *= original_dim kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var) kl_loss = K.sum(kl_loss, axis=-1) kl_loss *= -0.5 vae_loss = K.mean(reconstruction_loss + kl_loss) vae.add_loss(vae_loss) vae.compile(optimizer='adam') vae.summary() plot_model(vae, to_file='vae_mlp.png', show_shapes=True) if args.weights: vae = vae.load_weights(args.weights) else: # train the autoencoder vae.fit(x_train, epochs=epochs,
def identity_loss(y_true, y_pred): return K.abs(K.sum(y_pred - 0 * y_true))
def edge_wise_loss(true_y, embedding_diff): """1st order proximity """ # true_y supposed to be None # we don't use it return K.mean(K.sum(K.square(embedding_diff), axis=1)) # mean square error
def euclidean_distanceX(a,b): return K.sqrt(K.sum(K.square((a-b)), axis=1))
def dice_coef(y_true, y_pred): y_true_f = K.flatten(y_true) y_pred_f = K.flatten(y_pred) intersection = K.sum(y_true_f * y_pred_f) return (2. * intersection + K.epsilon()) / ( K.sum(K.square(y_true_f)) + K.sum(K.square(y_pred_f)) + K.epsilon())
def euclidean_distance(vects): x, y = vects return K.sqrt( K.sum(K.square(x - y) + np.random.rand() * .0001, axis=1, keepdims=True))
def loss(y_true,y_pred): cost = (1/32)*(0.95*(K.sum(y_true - y_pred))+0.05*K.sum((y_true-y_pred)**2)) return cost
def euclidean_distance(cls, two_vects): # 两个向量的欧氏距离计算 x, y = two_vects return K.sqrt(K.maximum(K.sum(K.square(x - y), axis=1, keepdims=True), K.epsilon()))
def norm(tensor): square_tensor = K.square(tensor) frobenius_norm2 = K.sum(square_tensor, axis=(1, 2)) return frobenius_norm2
b = K.square(x[:, :, :img_width - 1, :img_height - 1] - x[:, :, :img_width - 1, 1:]) return K.sum(K.pow(a + b, 1.25)) # define the loss loss = K.variable(0.) for layer_name in settings['features']: # add the L2 norm of the features of a layer to the loss assert layer_name in layer_dict.keys( ), 'Layer ' + layer_name + ' not found in model.' coeff = settings['features'][layer_name] x = layer_dict[layer_name].output shape = layer_dict[layer_name].output_shape # we avoid border artifacts by only involving non-border pixels in the loss loss -= coeff * K.sum(K.square(x[:, :, 2:shape[2] - 2, 2:shape[3] - 2])) / np.prod(shape[1:]) # add continuity loss (gives image local coherence, can result in an artful blur) loss += settings['continuity'] * continuity_loss(dream) / (3 * img_width * img_height) # add image L2 norm to loss (prevents pixels from taking very high values, makes image darker) loss += settings['dream_l2'] * K.sum( K.square(dream)) / (3 * img_width * img_height) # feel free to further modify the loss as you see fit, to achieve new effects... # compute the gradients of the dream wrt the loss grads = K.gradients(loss, dream) outputs = [loss] if type(grads) in {list, tuple}:
def outfunc(vects): cla, att = vects # (N, n_time, n_out), (N, n_time, n_out) att = K.clip(att, 1e-7, 1.) out = K.sum(cla * att, axis=1) / K.sum(att, axis=1) # (N, n_out) return out
def euclidean_distance(vects): x, y = vects return K.sqrt( K.maximum(K.sum(K.square(x - y), axis=1, keepdims=True), K.epsilon()))
def squash(x, axis=-1): s_squared_norm = K.sum(K.square(x), axis, keepdims=True) scale = K.sqrt(s_squared_norm + K.epsilon()) return x / scale
nb_filter = 64 pool_length = 4 model = Sequential() print('Build model...') model1 = Sequential() model1.add( Embedding(len(word_index) + 1, 300, weights=[embedding_matrix], input_length=40, trainable=False)) model1.add(TimeDistributed(Dense(300, activation='relu'))) model1.add(Lambda(lambda x: K.sum(x, axis=1), output_shape=(300, ))) model2 = Sequential() model2.add( Embedding(len(word_index) + 1, 300, weights=[embedding_matrix], input_length=40, trainable=False)) model2.add(TimeDistributed(Dense(300, activation='relu'))) model2.add(Lambda(lambda x: K.sum(x, axis=1), output_shape=(300, ))) model3 = Sequential() model3.add( Embedding(len(word_index) + 1,
def __loss_total(self, x): shape = self.__input_img.shape a = backend.square(x[:, :shape[1] - 1, :shape[2] - 1, :] - x[:, 1:, :shape[2] - 1, :]) b = backend.square(x[:, :shape[1] - 1, :shape[2] - 1, :] - x[:, :shape[1] - 1, 1:, :]) return backend.sum(backend.pow(a + b, 1.25))