def _distillation_loss_calculator(teacher_logits: Tensor, y_student: Tensor, temperature: float, y_true: Tensor, y_pred: Tensor, lambda_const: float) -> Tensor: """ Calculates the Distillation Loss between two networks. :param teacher_logits: the teacher network's logits. :param y_student: the student network's output. :param temperature: the temperature for the softmax. :param y_true: the true labels, if performing supervised distillation. :param y_pred: the predicted labels, if performing supervised distillation. :param lambda_const: the importance weight of the supervised loss. Set it to 0 if you do not want to apply supervised loss. :return: the distillation loss. """ # Apply softmax with temperature to the teacher's logits. y_teacher = softmax_with_temperature(temperature)(teacher_logits) # Calculate log-loss. loss = categorical_crossentropy(y_teacher, y_student) # If supervised distillation is being performed, add supervised loss, multiplied by its importance weight. if bool(lambda_const): loss = add( loss, multiply(lambda_const, categorical_crossentropy(y_true, y_pred))) return loss
def get_gradients(self, model): """ This function outputs a function to calculate the gradients based on the loss function, current weights/biases Input: - model: model that is training. Returns: - func: a function that uses input of features and true labels to calculate loss and hence gradients """ model_weights = model.trainable_weights if self.only_weights: weights = [ weight for weight in model_weights if 'kernel' in weight.name ] else: weights = [weight for weight in model_weights] if self.num_classes > 1: loss = K.mean(categorical_crossentropy(self.y_true, model.output)) else: loss = K.mean(binary_crossentropy(self.y_true, model.output)) func = K.function([model.input, self.y_true], K.gradients(loss, weights)) return func
def predict(gids, model_path, mode="train"): model = load_model(model_path, custom_objects=TF_CUSTOM_METRICS) images = [] labels = [] for gid in gids: image = cv2.imread(os.path.join("E:", "data", "densenet", mode, "images", f"{gid}.png"), cv2.IMREAD_COLOR) images.append(image / 255) label = cv2.imread(os.path.join("E:", "data", "densenet", mode, "labels", f"{gid}.png"), cv2.IMREAD_GRAYSCALE) labels.append(one_hot_encoding(label)) pred = model.predict(np.array(images)) losses = categorical_crossentropy(labels, pred) losses = np.mean(losses, axis=(1, 2)) argmax_mean_iou = ArgmaxMeanIoU(num_classes=6) for idx, p in enumerate(pred): argmax_mean_iou.update_state(labels[idx], p) iou = argmax_mean_iou.result().numpy() print(f"{gids[idx]}: loss={losses[idx]:02f} iou={iou:02f}") cv2.imwrite(f"images/{mode}/{gids[idx]}-prediction.png", one_hot_to_rgb(p)) cv2.imwrite(f"images/{mode}/{gids[idx]}-label.png", one_hot_to_rgb(labels[idx])) cv2.imwrite(f"images/{mode}/{gids[idx]}-image.png", images[idx] * 255)
def masked_loss_function(y_true, y_pred): # mask = K.cast(K.not_equal(y_true, [0, 0]), K.floatx()) # loss = K.categorical_crossentropy(y_true * mask, y_pred * mask) # return K.switch(K.flatten(K.equal(y_true, [0, 0])), K.zeros_like(loss), loss) idx = tf.not_equal(y_true, [-1, -1]) y_true = tf.boolean_mask(y_true, idx) y_pred = tf.boolean_mask(y_pred, idx) return losses.categorical_crossentropy(y_true, y_pred)
def cce_jaccard_loss(gt, pr, cce_weight=1., class_weights=1., smooth=SMOOTH, per_image=True): cce = categorical_crossentropy(gt, pr) * class_weights cce = K.mean(cce) return cce_weight * cce + jaccard_loss(gt, pr, smooth=smooth, class_weights=class_weights, per_image=per_image)
def my_categorical_crossentropy_label_smoothing(target, output): """ 对标签y-hot 进行平滑操作 example:[0, 0, 1, 0, 0] ==> [0.25, 0.25, 0.9, 0.25, 0.25] 参考:https://www.lizenghai.com/archives/31315.html 这里采用tesorflow label smoothing 方法 见 tf.losses.softmax_cross_entropy() """ label_smoothing = 0.1 num_classes = math_ops.cast(array_ops.shape(target)[1], output.dtype) smooth_positives = 1.0 - label_smoothing smooth_negatives = label_smoothing / num_classes onehot_labels = target * smooth_positives + smooth_negatives onehot_labels = array_ops.stop_gradient(onehot_labels, name="labels_stop_gradient") return categorical_crossentropy(onehot_labels, output)
def __init__(self, input_shape=(784, ), nb_classes=10, optimizer=tf.train.AdamOptimizer(1e-3)): # create graph self.input_ = tf.placeholder(tf.float32, [None] + list(input_shape)) self.feature_map, self.logit, self.output = self.build(self.input_, nb_classes) self.t = tf.placeholder(tf.float32, self.output.get_shape()) self.loss = tf.reduce_mean(categorical_crossentropy(self.t, self.output)) self.acc = tf.reduce_mean(categorical_accuracy(self.t, self.output)) self.optimizer = optimizer.minimize(self.loss) self.saver = tf.train.Saver() self.sess = tf.Session() self.sess.run(tf.global_variables_initializer())
def min_func(perturbation, c, image): image = image.reshape(classifier.get_input_shape()) image = tf.convert_to_tensor(image, dtype=tf.float32) perturbation = perturbation.reshape(classifier.get_input_shape()) perturbation = tf.convert_to_tensor(perturbation, dtype=tf.float32) with tf.GradientTape() as tape: tape.watch(perturbation) input_tensor = tf.add(image, perturbation) input_tensor = tf.expand_dims(input_tensor, 0) value = tf.add( tf.multiply(c, tf.linalg.norm(perturbation)), categorical_crossentropy(target_class, classifier(input_tensor))) gradient = tape.gradient(value, perturbation) return value.numpy(), gradient.numpy().flatten().astype(np.float64)
def Fashion_CNN(input_shape, num_classes, learning_rate, graph): with graph.as_default(): #is_train = tf.placeholder(tf.bool) img = tf.placeholder(tf.float32, input_shape) labels = tf.placeholder(tf.float32, shape=(None, num_classes)) lr = tf.placeholder(tf.float32) # first 3 convolutions approximate Conv(7,7): layer = conv_layer(img, 64) layer = conv_layer(layer, 64) layer = conv_layer(layer, 64) layer = MaxPooling2D()(layer) layer = dropout(layer, keep_prob=0.7) layer = conv_layer(layer, 128, shape=(-1, 14, 14, -1)) layer = conv_layer(layer, 128, shape=(-1, 14, 14, -1)) layer = conv_layer(layer, 64, (1, 1), shape=(-1, 14, 14, -1)) layer = MaxPooling2D()(layer) layer = Flatten()(layer) layer = dropout(layer, keep_prob=0.7) layer = fc_layer(layer, 2048) layer = dropout(layer) layer = fc_layer(layer, 512) layer = dropout(layer) layer = fc_layer(layer, 256) layer = dropout(layer) layer = Dense(10, kernel_initializer='glorot_normal')(layer) layer = batch_norm(layer, updates_collections=None, center=True, scale=True) preds = activations.softmax(layer) lossL2 = tf.add_n([ tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'kernel' in v.name ]) beta = 1e-7 loss = tf.reduce_mean(losses.categorical_crossentropy(labels, preds)) train_step = NadamOptimizer(learning_rate=lr).minimize(loss) acc_value = tf.reduce_mean(metrics.categorical_accuracy(labels, preds)) return img, labels, lr, train_step, loss, acc_value
def _logits_loss(self, acts_and_advs, logits): # a trick to input actions and advantages through same API actions, advantages = tf.split(acts_and_advs, 2, axis=-1) # sparse categorical CE loss obj that supports sample_weight arg on call() # from_logits argument ensures transformation into normalized probabilities weighted_sparse_ce = losses.SparseCategoricalCrossentropy( from_logits=True) # policy loss is defined by policy gradients, weighted by advantages # note: we only calculate the loss on the actions we've actually taken actions = tf.cast(actions, tf.int32) policy_loss = weighted_sparse_ce(actions, logits, sample_weight=advantages) # entropy loss can be calculated via CE over itself entropy_loss = losses.categorical_crossentropy(logits, logits, from_logits=True) # here signs are flipped because optimizer minimizes return policy_loss - self.params['entropy'] * entropy_loss
def custom_loss(y_true, y_pred): """Args: y_true -- label vector of shape (batch_size, num_classes)""" samples_per_cluster = K.transpose( K.sum(y_true, axis=0, keepdims=True) + 1) # Add 1 to avoid division by zero centers = K.dot(K.transpose(y_true), features) / samples_per_cluster center_loss = 0.5 * K.sum(K.square(features - K.dot(y_true, centers))) center_dot_combinations = K.dot(centers, K.transpose(centers)) center_dot_combinations_normed = K.sqrt( K.square(center_dot_combinations)) pair_dist = center_dot_combinations / center_dot_combinations_normed # subtract diagonal of pair_dist which only contains ones pair_dist = pair_dist - K.eye(num_classes) pair_dist = pair_dist + 1 pair_dist = K.sum(pair_dist) island_loss = center_loss + pair_dist return categorical_crossentropy(y_true, y_pred) + island_loss
def masked_loss_function(y_true, y_pred): idx = tf.not_equal(y_true, [-1,-1]) y_true = tf.boolean_mask(y_true, idx) y_pred = tf.boolean_mask(y_pred, idx) return losses.categorical_crossentropy(y_true, y_pred)
def loss(y_true, y_pred): return losses.categorical_crossentropy(y_true, y_pred)
def ce_dice_loss(y_true, y_pred): return categorical_crossentropy(y_true, y_pred) + (1 - dice_loss(y_true, y_pred))
def bce_dice_loss(y_true, y_pred): loss = losses.categorical_crossentropy(y_true, y_pred) + dice_loss( y_true, y_pred) return loss
layer = dropout(layer, is_training=is_train) layer = fc_layer(layer, 256) layer = dropout(layer, is_training=is_train) layer = Dense(10, kernel_initializer='glorot_normal')(layer) layer = batch_norm(layer, updates_collections=None, center=True, scale=True, is_training=is_train) preds = activations.softmax(layer) lossL2 = tf.add_n( [tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'kernel' in v.name]) beta = 1e-7 loss = tf.reduce_mean(losses.categorical_crossentropy(labels, preds)) train_step = NadamOptimizer(learning_rate=lr).minimize(loss) # Initialize all variables init_op = tf.global_variables_initializer() sess.run(init_op) acc_value = tf.reduce_mean(metrics.categorical_accuracy(labels, preds)) def accuracy(data, n): l = [] for i in range(n): batch = data.next_batch(100) acc = acc_value.eval(feed_dict={ img: batch[0],