def custom_loss(model, x, y_perf, y_rank, sample_weight): """Compute loss for i-th label Arguments: model {[type]} -- [Neural network] x {[type]} -- [Feature vector] y_perf {[type]} -- [Performances] y_rank {[type]} -- [Rankings] i {[type]} -- [Label] Returns: [float64] -- [Loss] """ output = model(x) row_indices = tf.range(tf.shape(y_rank)[0]) y_ind = y_rank - 1 added_indices_0 = tf.stack([row_indices, y_ind[:, 0]], axis=1) added_indices_1 = tf.stack([row_indices, y_ind[:, 1]], axis=1) y_hat_0 = tf.gather_nd(output, added_indices_0) y_hat_1 = tf.gather_nd(output, added_indices_1) reg_loss = tf.reduce_mean( tf.multiply(sample_weight, (tf.square(tf.subtract(y_hat_0, y_perf[:, 0]))))) reg_loss += tf.reduce_mean( (tf.square(tf.subtract(y_hat_1, y_perf[:, 1])))) rank_loss = tf.reduce_mean( tf.multiply( sample_weight, tf.square( tf.maximum(0, epsilon_value - (y_hat_0 - y_hat_1))))) return ( 1 - lambda_value ) * reg_loss + lambda_value * rank_loss, reg_loss, rank_loss
def discriminative_loss(prediction, correct_label, feature_dim, delta_v, delta_d, param_var, param_dist, param_reg): ''' Iterate over a batch of prediction/label and cumulate loss :return: discriminative loss and its three components ''' # i: 第i个batch, i >= B时循环停止 def cond(label, batch, out_loss, out_var, out_dist, out_reg, i): return tf.less(i, tf.shape(batch)[0]) def body(label, batch, out_loss, out_var, out_dist, out_reg, i): disc_loss, l_var, l_dist, l_reg = discriminative_loss_single( prediction[i], correct_label[i], feature_dim, delta_v, delta_d, param_var, param_dist, param_reg) # 在第i个index下写进后面的value out_loss = out_loss.write(i, disc_loss) out_var = out_var.write(i, l_var) out_dist = out_dist.write(i, l_dist) out_reg = out_reg.write(i, l_reg) return label, batch, out_loss, out_var, out_dist, out_reg, i + 1 # TensorArray is a data structure that support dynamic writing output_ta_loss = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True) output_ta_var = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True) output_ta_dist = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True) output_ta_reg = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True) _, _, out_loss_op, out_var_op, out_dist_op, out_reg_op, _ = tf.while_loop( cond, body, [ correct_label, prediction, output_ta_loss, output_ta_var, output_ta_dist, output_ta_reg, 0 ]) # 将array的元素堆叠成tensor out_loss_op = out_loss_op.stack() out_var_op = out_var_op.stack() out_dist_op = out_dist_op.stack() out_reg_op = out_reg_op.stack() disc_loss = tf.reduce_mean(out_loss_op) l_var = tf.reduce_mean(out_var_op) l_dist = tf.reduce_mean(out_dist_op) l_reg = tf.reduce_mean(out_reg_op) return disc_loss, l_var, l_dist, l_reg
def custom_loss(model, x, y_perf, y_rank, i, sample_weights): """Compute loss for i-th label Arguments: model {[type]} -- [Neural network] x {[type]} -- [Feature vector] y_perf {[type]} -- [Performances] y_rank {[type]} -- [Rankings] i {[type]} -- [Label] Returns: [float64] -- [Loss] """ output = model(x) row_indices = tf.range(tf.shape(y_rank)[0]) y_ind = y_rank - 1 added_indices_0 = tf.stack([row_indices, y_ind[:, 0]], axis=1) added_indices_1 = tf.stack([row_indices, y_ind[:, 1]], axis=1) y_hat_0 = tf.gather_nd(output, added_indices_0) y_hat_1 = tf.gather_nd(output, added_indices_1) y_hat = tf.gather_nd(output, tf.stack([row_indices, y_ind[:, i]], axis=1)) reg_loss = tf.reduce_mean( tf.multiply(sample_weight, tf.square(tf.subtract(y_hat, y_perf[:, i])))) # exp_utils = tf.exp(output) exp_utils_ordered = tf.exp(tf.stack([y_hat_1, y_hat_0], axis=1)) exp_utils = tf.exp(output) # exp_utils_ordered = exp_utils[ # np.arange(exp_utils.shape[0])[:, np.newaxis], y_ind] inv_rank = tf.argsort(y_rank) rank_loss = 0.0 for k in range(0, 2): # print("i", i, "k", k) # indicator = (1 - y_ind[:, i]) >= k indicator = inv_rank[:, i] >= k indicator = tf.keras.backend.repeat_elements(indicator[:, None], num_labels, axis=1) denominator = tf.reduce_sum(exp_utils_ordered[:, k:], axis=1) rank_loss = tf.add( rank_loss, tf.divide(exp_utils_ordered[:, i], denominator)) if i < 2: rank_loss = tf.subtract(rank_loss, 1) rank_loss = tf.reduce_mean(tf.multiply(sample_weight, rank_loss)) return lambda_value * rank_loss + (1 - lambda_value) * reg_loss, reg_loss, rank_loss
def fit(self, num_labels: int, rankings: np.ndarray, features: np.ndarray, performances: np.ndarray, sample_weights=None, lambda_value=0.5, num_epochs=1000, learning_rate=0.001, batch_size=32, seed=1, patience=16, es_val_ratio=0.3, regression_loss="Squared", reshuffle_buffer_size=1000, early_stop_interval=5, log_losses=True, hidden_layer_sizes=None, activation_function="relu"): """Fit the network to the given data. Arguments: num_labels {int} -- Number of labels in the ranking rankings {np.ndarray} -- Ranking of performances features {np.ndarray} -- Features performances {np.ndarray} -- Performances lambda_value {float} -- Lambda regression_loss {String} -- Which regression loss should be applied, "Squared" and "Absolute" are supported """ tf.random.set_seed(seed) if sample_weights is None: sample_weights = np.ones(features.shape[0]) # add one column for bias np.random.seed(seed) num_features = features.shape[1] + 1 self.network = self.build_network( num_labels, num_features, hidden_layer_sizes=hidden_layer_sizes, activation_function=activation_function) self.network._make_predict_function() self.network.summary() self.loss_history = [] self.es_val_history = [] # add constant 1 for bias and create tf dataset feature_values = np.hstack((features, np.ones((features.shape[0], 1)))) # print(feature_values.shape) # print(performances.shape) # split feature and performance data feature_values, performances, rankings, sample_weights = shuffle( feature_values, performances, rankings, sample_weights, random_state=seed) val_data = Dataset.from_tensor_slices( (feature_values[:int(es_val_ratio * feature_values.shape[0])], performances[:int(es_val_ratio * performances.shape[0])], rankings[:int(es_val_ratio * rankings.shape[0])], sample_weights[:int(es_val_ratio * sample_weights.shape[0])])) train_data = Dataset.from_tensor_slices( (feature_values[int(es_val_ratio * feature_values.shape[0]):], performances[int(es_val_ratio * performances.shape[0]):], rankings[int(es_val_ratio * rankings.shape[0]):], sample_weights[int(es_val_ratio * sample_weights.shape[0]):])) # print(val_data) # print(train_data) train_data = train_data.batch(batch_size) val_data = val_data.batch(1) # define custom loss function, i.e. convex combination of the of i-th partial derivative of the negative log-likelihood and squared regression error def custom_loss(model, x, y_perf, y_rank, i, sample_weights): """Compute loss for i-th label Arguments: model {[type]} -- [Neural network] x {[type]} -- [Feature vector] y_perf {[type]} -- [Performances] y_rank {[type]} -- [Rankings] i {[type]} -- [Label] Returns: [float64] -- [Loss] """ output = model(x) row_indices = tf.range(tf.shape(y_rank)[0]) y_ind = y_rank - 1 added_indices_0 = tf.stack([row_indices, y_ind[:, 0]], axis=1) added_indices_1 = tf.stack([row_indices, y_ind[:, 1]], axis=1) y_hat_0 = tf.gather_nd(output, added_indices_0) y_hat_1 = tf.gather_nd(output, added_indices_1) y_hat = tf.gather_nd(output, tf.stack([row_indices, y_ind[:, i]], axis=1)) reg_loss = tf.reduce_mean( tf.multiply(sample_weight, tf.square(tf.subtract(y_hat, y_perf[:, i])))) # exp_utils = tf.exp(output) exp_utils_ordered = tf.exp(tf.stack([y_hat_1, y_hat_0], axis=1)) exp_utils = tf.exp(output) # exp_utils_ordered = exp_utils[ # np.arange(exp_utils.shape[0])[:, np.newaxis], y_ind] inv_rank = tf.argsort(y_rank) rank_loss = 0.0 for k in range(0, 2): # print("i", i, "k", k) # indicator = (1 - y_ind[:, i]) >= k indicator = inv_rank[:, i] >= k indicator = tf.keras.backend.repeat_elements(indicator[:, None], num_labels, axis=1) denominator = tf.reduce_sum(exp_utils_ordered[:, k:], axis=1) rank_loss = tf.add( rank_loss, tf.divide(exp_utils_ordered[:, i], denominator)) if i < 2: rank_loss = tf.subtract(rank_loss, 1) rank_loss = tf.reduce_mean(tf.multiply(sample_weight, rank_loss)) return lambda_value * rank_loss + (1 - lambda_value) * reg_loss, reg_loss, rank_loss # define gradient of custom loss function def grad(model, x, y_perf, y_rank, i, sample_weights): with tf.GradientTape() as tape: loss_value, reg_loss, rank_loss = custom_loss(model, x, y_perf, y_rank, i, sample_weights) return loss_value, tape.gradient(loss_value, model.trainable_weights), reg_loss, rank_loss # # define objective, i.e. convex combination of nll and mse # def custom_objective(model, x, y_perf, y_rank, sample_weights): # """Compute loss for i-th label # Arguments: # model {[type]} -- [Neural network] # x {[type]} -- [Feature vector] # y_perf {[type]} -- [Performances] # y_rank {[type]} -- [Rankings] # i {[type]} -- [Label] # Returns: # [float64] -- [Loss] # """ # output = model(x) # row_indices = tf.range(tf.shape(y_rank)[0]) # y_ind = y_rank - 1 # added_indices_0 = tf.stack([row_indices, y_ind[:, 0]], axis=1) # added_indices_1 = tf.stack([row_indices, y_ind[:, 1]], axis=1) # y_hat_0 = tf.gather_nd(output, added_indices_0) # y_hat_1 = tf.gather_nd(output, added_indices_1) # reg_loss = tf.reduce_mean( # tf.multiply(sample_weight, # (tf.square(tf.subtract(y_hat_0, y_perf[:, 0]))))) # reg_loss += tf.reduce_mean( # tf.multiply(sample_weight, # (tf.square(tf.subtract(y_hat_1, y_perf[:, 1]))))) # utils_ordered = tf.stack([y_hat_0, y_hat_1], axis=1) # exp_utils_ordered = tf.exp(utils_ordered) # exp_utils = tf.exp(output) # rank_loss = 0.0 # for k in range(0, 2): # logsum = tf.reduce_sum(exp_utils_ordered[:, k:], axis=1) # rank_loss += tf.math.log(logsum) # # print("rank loss", rank_loss) # # print("rank loss after", tf.reduce_sum(rank_loss)) # rank_loss = tf.reduce_sum(tf.multiply( # sample_weight, rank_loss)) - tf.reduce_sum( # tf.multiply(sample_weight, utils_ordered)) # return lambda_value * rank_loss + (1 - lambda_value) * reg_loss # define objective, i.e. convex combination of nll and mse def custom_objective(model, x, y_perf, y_rank, sample_weights): obj_val = 0 for i in range(2): obj_val = obj_val + \ custom_loss(model, x, y_perf, y_rank, i, sample_weights)[0] return obj_val # optimizer optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate) best_val_loss = float("inf") current_best_weights = self.network.get_weights() patience_cnt = 0 for epoch in range(num_epochs): epoch_reg_loss_avg = tf.keras.metrics.Mean() epoch_rank_loss_avg = tf.keras.metrics.Mean() for x, y_perf, y_rank, sample_weight in train_data: tvs = self.network.trainable_weights accum_tvs = [ tf.Variable(tf.zeros_like(tv.initialized_value()), trainable=False) for tv in tvs ] zero_ops = [tv.assign(tf.zeros_like(tv)) for tv in accum_tvs] reg_loss_sum = 0 rank_loss_sum = 0 for i in range(2): loss_value, grads, reg_loss, rank_loss = grad(self.network, x, y_perf, y_rank, i, sample_weight) reg_loss_sum += reg_loss rank_loss_sum += rank_loss for j in range(len(accum_tvs)): accum_tvs[j].assign_add(grads[j]) # print(loss_value) optimizer.apply_gradients( zip(accum_tvs, self.network.trainable_weights)) epoch_reg_loss_avg(reg_loss_sum) epoch_rank_loss_avg(rank_loss_sum) if log_losses: self.loss_history.append( [float(epoch_reg_loss_avg.result()), float(epoch_rank_loss_avg.result())]) if epoch % early_stop_interval == 0: print("early stopping check") losses = [] for x, y_perf, y_rank, sample_weight in val_data: losses.append( custom_objective(self.network, x, y_perf, y_rank, sample_weight)) loss_tensor = np.average(losses) current_val_loss = tf.reduce_mean(loss_tensor) print("cur val loss", current_val_loss) self.es_val_history.append(current_val_loss) if current_val_loss < best_val_loss: best_val_loss = current_val_loss current_best_weights = self.network.get_weights() print("new best validation loss", best_val_loss) patience_cnt = 0 else: patience_cnt += 1 print("patience counter", patience_cnt) if patience_cnt >= patience: print("early stopping") break self.network.set_weights(current_best_weights) print("best weights", current_best_weights)
def discriminative_loss_single(prediction, correct_label, feature_dim, delta_v, delta_d, param_var, param_dist, param_reg): ''' Discriminative loss for a single prediction/label pair. :param prediction: inference of network :param correct_label: instance label :feature_dim: feature dimension of prediction :param label_shape: shape of label :param delta_v: cutoff variance distance :param delta_d: curoff cluster distance :param param_var: weight for intra cluster variance :param param_dist: weight for inter cluster distances :param param_reg: weight regularization ''' ### Reshape so pixels are aligned along a vector #correct_label = tf.reshape(correct_label, [label_shape[1] * label_shape[0]]) reshaped_pred = tf.reshape(prediction, [-1, feature_dim]) ### Count instances unique_labels, unique_id, counts = tf.unique_with_counts(correct_label) counts = tf.cast(counts, tf.float32) num_instances = tf.size(unique_labels) segmented_sum = tf.unsorted_segment_sum(reshaped_pred, unique_id, num_instances) mu = tf.div(segmented_sum, tf.reshape(counts, (-1, 1))) mu_expand = tf.gather(mu, unique_id) ### Calculate l_var #distance = tf.norm(tf.subtract(mu_expand, reshaped_pred), axis=1) #tmp_distance = tf.subtract(reshaped_pred, mu_expand) tmp_distance = reshaped_pred - mu_expand distance = tf.norm(tmp_distance, ord=1, axis=1) distance = tf.subtract(distance, delta_v) distance = tf.clip_by_value(distance, 0., distance) distance = tf.square(distance) l_var = tf.unsorted_segment_sum(distance, unique_id, num_instances) l_var = tf.div(l_var, counts) l_var = tf.reduce_sum(l_var) l_var = tf.divide(l_var, tf.cast(num_instances, tf.float32)) ### Calculate l_dist # Get distance for each pair of clusters like this: # mu_1 - mu_1 # mu_2 - mu_1 # mu_3 - mu_1 # mu_1 - mu_2 # mu_2 - mu_2 # mu_3 - mu_2 # mu_1 - mu_3 # mu_2 - mu_3 # mu_3 - mu_3 mu_interleaved_rep = tf.tile(mu, [num_instances, 1]) mu_band_rep = tf.tile(mu, [1, num_instances]) mu_band_rep = tf.reshape(mu_band_rep, (num_instances * num_instances, feature_dim)) mu_diff = tf.subtract(mu_band_rep, mu_interleaved_rep) # Filter out zeros from same cluster subtraction eye = tf.eye(num_instances) zero = tf.zeros(1, dtype=tf.float32) diff_cluster_mask = tf.equal(eye, zero) diff_cluster_mask = tf.reshape(diff_cluster_mask, [-1]) mu_diff_bool = tf.boolean_mask(mu_diff, diff_cluster_mask) #intermediate_tensor = tf.reduce_sum(tf.abs(mu_diff),axis=1) #zero_vector = tf.zeros(1, dtype=tf.float32) #bool_mask = tf.not_equal(intermediate_tensor, zero_vector) #mu_diff_bool = tf.boolean_mask(mu_diff, bool_mask) mu_norm = tf.norm(mu_diff_bool, ord=1, axis=1) mu_norm = tf.subtract(2. * delta_d, mu_norm) mu_norm = tf.clip_by_value(mu_norm, 0., mu_norm) mu_norm = tf.square(mu_norm) l_dist = tf.reduce_mean(mu_norm) def rt_0(): return 0. def rt_l_dist(): return l_dist l_dist = tf.cond(tf.equal(1, num_instances), rt_0, rt_l_dist) ### Calculate l_reg l_reg = tf.reduce_mean(tf.norm(mu, ord=1, axis=1)) param_scale = 1. l_var = param_var * l_var l_dist = param_dist * l_dist l_reg = param_reg * l_reg loss = param_scale * (l_var + l_dist + l_reg) return loss, l_var, l_dist, l_reg
import tensorflow_core as tf mnist = input_data.read_data_sets('MNIST_data', one_hot=True) #启动计算图 sess = tf.InteractiveSession() #占位符 x = tf.placeholder("float", shape=[None, 784]) y_ = tf.placeholder("float", shape=[None, 10]) #权重 W = tf.Variable(tf.zeros([784, 10])) #偏置 b = tf.Variable(tf.zeros([10])) #初始化 sess.run(tf.initialize_all_variables()) #预测 y = tf.nn.softmax(tf.matmul(x, W) + b) # 交叉熵作为损失函数 cross_entropy = -tf.reduce_sum(y_ * tf.log(y)) # 训练---最小化损失函数 train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy) #循环次数 for i in range(1000): batch = mnist.train.next_batch(50) train_step.run(feed_dict={x: batch[0], y_: batch[1]}) #判断是否预测正确 correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) #计算准确率 accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) # 打印准确率 print(accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels}))
def compute_loss(self, true, prediction): loss = tf.keras.losses.sparse_categorical_crossentropy( true, prediction) avg_loss = tf.reduce_mean(loss) return avg_loss
def fit(self, num_labels: int, rankings: np.ndarray, features: np.ndarray, performances: np.ndarray, sample_weights=None, lambda_value=0.5, epsilon_value=1, num_epochs=1000, learning_rate=0.001, batch_size=32, seed=1, patience=16, es_val_ratio=0.3, regression_loss="Squared", reshuffle_buffer_size=1000, early_stop_interval=5, log_losses=True, hidden_layer_sizes=None, activation_function="relu"): """Fit the network to the given data. Arguments: num_labels {int} -- Number of labels in the ranking rankings {np.ndarray} -- Ranking of performances features {np.ndarray} -- Features performances {np.ndarray} -- Performances lambda_value {float} -- Lambda regression_loss {String} -- Which regression loss should be applied, "Squared" and "Absolute" are supported """ tf.random.set_seed(seed) if sample_weights is None: sample_weights = np.ones(features.shape[0]) # add one column for bias np.random.seed(seed) num_features = features.shape[1] + 1 self.network = self.build_network( num_labels, num_features, hidden_layer_sizes=hidden_layer_sizes, activation_function=activation_function) self.network._make_predict_function() self.network.summary() self.loss_history = [] self.es_val_history = [] # add constant 1 for bias and create tf dataset feature_values = np.hstack((features, np.ones((features.shape[0], 1)))) # print(feature_values.shape) # print(performances.shape) # split feature and performance data feature_values, performances, rankings, sample_weights = shuffle( feature_values, performances, rankings, sample_weights, random_state=seed, ) val_data = Dataset.from_tensor_slices( (feature_values[:int(es_val_ratio * feature_values.shape[0])], performances[:int(es_val_ratio * performances.shape[0])], rankings[:int(es_val_ratio * rankings.shape[0])], sample_weights[:int(es_val_ratio * sample_weights.shape[0])])) train_data = Dataset.from_tensor_slices( (feature_values[int(es_val_ratio * feature_values.shape[0]):], performances[int(es_val_ratio * performances.shape[0]):], rankings[int(es_val_ratio * rankings.shape[0]):], sample_weights[int(es_val_ratio * sample_weights.shape[0]):])) # print(val_data) # print("train data", train_data) train_data = train_data.batch(batch_size) val_data = val_data.batch(1) # define custom loss function def custom_loss(model, x, y_perf, y_rank, sample_weight): """Compute loss for i-th label Arguments: model {[type]} -- [Neural network] x {[type]} -- [Feature vector] y_perf {[type]} -- [Performances] y_rank {[type]} -- [Rankings] i {[type]} -- [Label] Returns: [float64] -- [Loss] """ output = model(x) row_indices = tf.range(tf.shape(y_rank)[0]) y_ind = y_rank - 1 added_indices_0 = tf.stack([row_indices, y_ind[:, 0]], axis=1) added_indices_1 = tf.stack([row_indices, y_ind[:, 1]], axis=1) y_hat_0 = tf.gather_nd(output, added_indices_0) y_hat_1 = tf.gather_nd(output, added_indices_1) reg_loss = tf.reduce_mean( tf.multiply(sample_weight, (tf.square(tf.subtract(y_hat_0, y_perf[:, 0]))))) reg_loss += tf.reduce_mean( (tf.square(tf.subtract(y_hat_1, y_perf[:, 1])))) rank_loss = tf.reduce_mean( tf.multiply( sample_weight, tf.square( tf.maximum(0, epsilon_value - (y_hat_0 - y_hat_1))))) return ( 1 - lambda_value ) * reg_loss + lambda_value * rank_loss, reg_loss, rank_loss # define gradient of custom loss function def grad(model, x, y_perf, y_rank, sample_weight): with tf.GradientTape() as tape: loss_value, reg_loss, rank_loss = custom_loss( model, x, y_perf, y_rank, sample_weight) return loss_value, tape.gradient( loss_value, model.trainable_weights), reg_loss, rank_loss # optimizer optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate) best_val_loss = float("inf") current_best_weights = self.network.get_weights() patience_cnt = 0 for epoch in range(num_epochs): epoch_reg_loss_avg = tf.keras.metrics.Mean() epoch_rank_loss_avg = tf.keras.metrics.Mean() for x, y_perf, y_rank, sample_weight in train_data: loss_value, grads, reg_loss, rank_loss = grad( self.network, x, y_perf, y_rank, sample_weight) optimizer.apply_gradients( zip(grads, self.network.trainable_weights)) epoch_reg_loss_avg(reg_loss) epoch_rank_loss_avg(rank_loss) if log_losses: self.loss_history.append([ float(epoch_reg_loss_avg.result()), float(epoch_rank_loss_avg.result()) ]) if epoch % early_stop_interval == 0: losses = [] for x, y_perf, y_rank, sample_weight in val_data: losses.append( custom_loss(self.network, x, y_perf, y_rank, sample_weight)) loss_tensor = np.average(losses) current_val_loss = tf.reduce_mean(loss_tensor) print("cur val loss", current_val_loss) self.es_val_history.append(current_val_loss) if current_val_loss < best_val_loss: best_val_loss = current_val_loss current_best_weights = self.network.get_weights() patience_cnt = 0 else: patience_cnt += 1 print("patience counter", patience_cnt) if patience_cnt >= patience: print("early stopping") break self.network.set_weights(current_best_weights)