def _forward(self, x, y, model_params, init_states, is_training=False): """Computes the logits. Args: x: [batch_size, num_steps], input batch. y: [batch_size, num_steps], output batch. model_params: a `dict` of params to use. init_states: a `dict` of params to use. is_training: if `True`, will apply regularizations. Returns: loss: scalar, cross-entropy loss """ w_emb = model_params['w_emb'] w_prev = model_params['w_prev'] w_skip = model_params['w_skip'] w_soft = model_params['w_soft'] prev_s = init_states['s'] emb = tf.nn.embedding_lookup(w_emb, x) batch_size = self.params.batch_size hidden_size = self.params.hidden_size if is_training: emb = tf.layers.dropout(emb, self.params.drop_i, [self.params.batch_size, 1, hidden_size], training=True) input_mask = _gen_mask([batch_size, hidden_size], self.params.drop_x) layer_mask = _gen_mask([batch_size, hidden_size], self.params.drop_l) else: input_mask = None layer_mask = None out_s, all_s = _rnn_fn(emb, prev_s, w_prev, w_skip, input_mask, layer_mask, self.params) top_s = all_s if is_training: top_s = tf.layers.dropout(top_s, self.params.drop_o, [batch_size, 1, hidden_size], training=True) carry_on = [tf.assign(prev_s, out_s)] logits = tf.einsum('bnh,vh->bnv', top_s, w_soft) loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits) loss = tf.reduce_mean(loss) reg_loss = loss # loss + regularization_terms, for training only if is_training: # L2 weight reg reg_loss += self.params.weight_decay * tf.add_n( [tf.reduce_sum(w**2) for w in tf.trainable_variables()]) # activation L2 reg reg_loss += self.params.alpha * tf.reduce_mean(all_s**2) with tf.control_dependencies(carry_on): loss = tf.identity(loss) if is_training: reg_loss = tf.identity(reg_loss) return reg_loss, loss
def detection_loss(cls_outputs, box_outputs, labels, params): """Computes total detection loss. Computes total detection loss including box and class loss from all levels. Args: cls_outputs: an OrderDict with keys representing levels and values representing logits in [batch_size, height, width, num_anchors]. box_outputs: an OrderDict with keys representing levels and values representing box regression targets in [batch_size, height, width, num_anchors * 4]. labels: the dictionary that returned from dataloader that includes groundtruth targets. params: the dictionary including training parameters specified in default_haprams function in this file. Returns: total_loss: an integer tensor representing total loss reducing from class and box losses from all levels. cls_loss: an integer tensor representing total class loss. box_loss: an integer tensor representing total box regression loss. """ # Sum all positives in a batch for normalization and avoid zero # num_positives_sum, which would lead to inf loss during training num_positives_sum = tf.reduce_sum(labels['mean_num_positives']) + 1.0 levels = cls_outputs.keys() cls_losses = [] box_losses = [] for level in levels: if params['data_format'] == 'channels_first': labels['cls_targets_%d' % level] = tf.transpose( labels['cls_targets_%d' % level], [0, 3, 1, 2]) labels['box_targets_%d' % level] = tf.transpose( labels['box_targets_%d' % level], [0, 3, 1, 2]) # Onehot encoding for classification labels. cls_targets_at_level = tf.one_hot(labels['cls_targets_%d' % level], params['num_classes']) if params['data_format'] == 'channels_first': bs, _, width, height, _ = cls_targets_at_level.get_shape().as_list( ) cls_targets_at_level = tf.reshape(cls_targets_at_level, [bs, -1, width, height]) else: bs, width, height, _, _ = cls_targets_at_level.get_shape().as_list( ) cls_targets_at_level = tf.reshape(cls_targets_at_level, [bs, width, height, -1]) box_targets_at_level = labels['box_targets_%d' % level] cls_loss = _classification_loss(cls_outputs[level], cls_targets_at_level, num_positives_sum, alpha=params['alpha'], gamma=params['gamma']) if params['data_format'] == 'channels_first': cls_loss = tf.reshape( cls_loss, [bs, -1, width, height, params['num_classes']]) else: cls_loss = tf.reshape( cls_loss, [bs, width, height, -1, params['num_classes']]) cls_loss *= tf.cast( tf.expand_dims(tf.not_equal(labels['cls_targets_%d' % level], -2), -1), tf.float32) cls_losses.append(tf.reduce_sum(cls_loss)) box_losses.append( _box_loss(box_outputs[level], box_targets_at_level, num_positives_sum, delta=params['delta'])) # Sum per level losses to total loss. cls_loss = tf.add_n(cls_losses) box_loss = tf.add_n(box_losses) total_loss = cls_loss + params['box_loss_weight'] * box_loss return total_loss, cls_loss, box_loss
def initialise_model(self, numpy_embedding): """ Initialises the TensorFlow Attract-Repel model. """ self.attract_examples = tf.placeholder( tf.int32, [None, 2]) # each element is the position of word vector. self.repel_examples = tf.placeholder( tf.int32, [None, 2]) # each element is again the position of word vector. self.negative_examples_attract = tf.placeholder(tf.int32, [None, 2]) self.negative_examples_repel = tf.placeholder(tf.int32, [None, 2]) self.attract_margin = tf.placeholder("float") self.repel_margin = tf.placeholder("float") self.regularisation_constant = tf.placeholder("float") # Initial (distributional) vectors. Needed for L2 regularisation. self.W_init = tf.constant(numpy_embedding, name="W_init") # Variable storing the updated word vectors. self.W_dynamic = tf.Variable(numpy_embedding, name="W_dynamic") # Attract Cost Function: # placeholders for example pairs... attract_examples_left = tf.nn.l2_normalize( tf.nn.embedding_lookup(self.W_dynamic, self.attract_examples[:, 0]), 1) attract_examples_right = tf.nn.l2_normalize( tf.nn.embedding_lookup(self.W_dynamic, self.attract_examples[:, 1]), 1) # and their respective negative examples: negative_examples_attract_left = tf.nn.l2_normalize( tf.nn.embedding_lookup(self.W_dynamic, self.negative_examples_attract[:, 0]), 1) negative_examples_attract_right = tf.nn.l2_normalize( tf.nn.embedding_lookup(self.W_dynamic, self.negative_examples_attract[:, 1]), 1) # dot product between the example pairs. attract_similarity_between_examples = tf.reduce_sum( tf.multiply(attract_examples_left, attract_examples_right), 1) # dot product of each word in the example with its negative example. attract_similarity_to_negatives_left = tf.reduce_sum( tf.multiply(attract_examples_left, negative_examples_attract_left), 1) attract_similarity_to_negatives_right = tf.reduce_sum( tf.multiply(attract_examples_right, negative_examples_attract_right), 1) # and the final Attract Cost Function (sans regularisation): self.attract_cost = tf.nn.relu( self.attract_margin + attract_similarity_to_negatives_left - attract_similarity_between_examples) + \ tf.nn.relu( self.attract_margin + attract_similarity_to_negatives_right - attract_similarity_between_examples) # Repel Cost Function: # placeholders for example pairs... repel_examples_left = tf.nn.l2_normalize( tf.nn.embedding_lookup(self.W_dynamic, self.repel_examples[:, 0]), 1) # becomes batch_size X vector_dimension repel_examples_right = tf.nn.l2_normalize( tf.nn.embedding_lookup(self.W_dynamic, self.repel_examples[:, 1]), 1) # and their respective negative examples: negative_examples_repel_left = tf.nn.l2_normalize( tf.nn.embedding_lookup(self.W_dynamic, self.negative_examples_repel[:, 0]), 1) negative_examples_repel_right = tf.nn.l2_normalize( tf.nn.embedding_lookup(self.W_dynamic, self.negative_examples_repel[:, 1]), 1) # dot product between the example pairs. repel_similarity_between_examples = tf.reduce_sum( tf.multiply(repel_examples_left, repel_examples_right), 1) # becomes batch_size again, might need tf.squeeze # dot product of each word in the example with its negative example. repel_similarity_to_negatives_left = tf.reduce_sum( tf.multiply(repel_examples_left, negative_examples_repel_left), 1) repel_similarity_to_negatives_right = tf.reduce_sum( tf.multiply(repel_examples_right, negative_examples_repel_right), 1) # and the final Repel Cost Function (sans regularisation): self.repel_cost = tf.nn.relu( self.repel_margin - repel_similarity_to_negatives_left + repel_similarity_between_examples) + \ tf.nn.relu( self.repel_margin - repel_similarity_to_negatives_right + repel_similarity_between_examples) # The Regularisation Cost (separate for the two terms, depending on which one is called): # load the original distributional vectors for the example pairs: original_attract_examples_left = tf.nn.embedding_lookup( self.W_init, self.attract_examples[:, 0]) original_attract_examples_right = tf.nn.embedding_lookup( self.W_init, self.attract_examples[:, 1]) original_repel_examples_left = tf.nn.embedding_lookup( self.W_init, self.repel_examples[:, 0]) original_repel_examples_right = tf.nn.embedding_lookup( self.W_init, self.repel_examples[:, 1]) # and then define the respective regularisation costs: regularisation_cost_attract = self.regularisation_constant * ( tf.nn.l2_loss(original_attract_examples_left - attract_examples_left) + tf.nn.l2_loss(original_attract_examples_right - attract_examples_right)) self.attract_cost += regularisation_cost_attract regularisation_cost_repel = self.regularisation_constant * ( tf.nn.l2_loss(original_repel_examples_left - repel_examples_left) + tf.nn.l2_loss(original_repel_examples_right - repel_examples_right)) self.repel_cost += regularisation_cost_repel # Finally, we define the training step functions for both steps. tvars = tf.trainable_variables() attract_grads = [ tf.clip_by_value(grad, -2., 2.) for grad in tf.gradients(self.attract_cost, tvars) ] repel_grads = [ tf.clip_by_value(grad, -2., 2.) for grad in tf.gradients(self.repel_cost, tvars) ] attract_optimiser = tf.train.AdagradOptimizer(0.05) repel_optimiser = tf.train.AdagradOptimizer(0.05) self.attract_cost_step = attract_optimiser.apply_gradients( list(zip(attract_grads, tvars))) self.repel_cost_step = repel_optimiser.apply_gradients( list(zip(repel_grads, tvars))) # return the handles for loading vectors from the TensorFlow embeddings: return attract_examples_left, attract_examples_right, repel_examples_left, repel_examples_right
def avg_norm(t): return tf.reduce_mean(tf.sqrt(tf.reduce_sum(tf.square(t), axis=-1)))
mnist = input_data.read_data_sets("MINIST_data/", one_hot=True) import pylab import tensorflow.compat.v1 as tf tf.disable_eager_execution() tf.reset_default_graph() x = tf.placeholder(tf.float32, [None, 784]) y = tf.placeholder(tf.float32, [None, 10]) W = tf.Variable(tf.random_normal([784, 10])) b = tf.Variable(tf.zeros([10])) pred = tf.nn.softmax(tf.matmul(x, W) + b) cost = tf.reduce_mean(-tf.reduce_sum(y * tf.log(pred), reduction_indices=1)) learning_rate = 0.01 optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost) training_epochs = 25 batch_size = 100 display_step = 1 saver = tf.train.Saver(max_to_keep=1) savedir = "model/" fileprefix = "handwriting.ckpt" with tf.Session() as sess: sess.run(tf.global_variables_initializer())
W8, output_shape=tf.stack([ input_data_num, img_size_1, img_size_1, channel_num_1 ]), strides=[1, 2, 2, 1], padding='SAME') decode_layer4 = tf.add(decode_layer4, b8) decode_layer4 = tf.nn.sigmoid(decode_layer4) tf.add_to_collection('reg_losses', tf.nn.l2_loss(W8)) tf.add_to_collection('reg_losses', tf.nn.l2_loss(b8)) # Loss layer with tf.name_scope('Loss_Layer') as scope: error = decode_layer4 - input_label error_square = tf.reduce_sum(tf.square(error)) reg_loss = tf.add_n(tf.get_collection('reg_losses')) total_loss = error_square + reg_loss * regulation # Summarize Scalar value tf.summary.scalar('Total_loss', total_loss) tf.summary.scalar('Reg_losses', reg_loss) # Set operation train_op = tf.train.AdamOptimizer(learning_rate).minimize(total_loss) summary_op = tf.summary.merge_all() '''세션 구성''' with tf.Session() as sess: sess.run(tf.global_variables_initializer()) batch_count = int(x_train.shape[0] / batch_size) writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph())
def cat_entropy(logits): a0 = logits - tf.reduce_max(logits, 1, keepdims=True) ea0 = tf.exp(a0) z0 = tf.reduce_sum(ea0, 1, keepdims=True) p0 = ea0 / z0 return tf.reduce_sum(p0 * (tf.log(z0) - a0), 1)
def multiply_fisher_factor_transpose(self, vector): probs = self._probs sqrt_probs = self._sqrt_probs return sqrt_probs * vector - sqrt_probs * tf.reduce_sum( probs * vector, axis=-1, keepdims=True)
def apply_line_prediction(inputs, features, blur_steps, learn_alpha=True, name=None): """Applies "Line Prediction" layer to input images.""" inputs.shape.assert_is_compatible_with([None, None, None, 6]) with tf.name_scope(name, 'blur_prediction', values=[inputs, features]): with tf.name_scope(None, 'input_frames', values=[inputs]): frames = [inputs[:, :, :, :3], inputs[:, :, :, 3:]] with tf.name_scope(None, 'frame_size', values=[inputs, features]): shape = tf.shape(inputs) height = shape[1] width = shape[2] with tf.name_scope(None, 'identity_warp', values=[]): x_idx, y_idx = tf.meshgrid(tf.range(width), tf.range(height)) identity_warp = tf.to_float(tf.stack([x_idx, y_idx], axis=-1)) identity_warp = identity_warp[tf.newaxis, :, :, tf.newaxis, :] warp_steps = tf.to_float(tf.range(blur_steps - 1) + 1) / (blur_steps - 1) warp_steps = warp_steps[tf.newaxis, tf.newaxis, tf.newaxis, :, tf.newaxis] max_warps = tf.to_float(tf.stack([width - 1, height - 1])) max_warps = max_warps[tf.newaxis, tf.newaxis, tf.newaxis, tf.newaxis, :] output_frames = [] for frame in frames: with tf.name_scope(None, 'predict_blurs', values=[features]): flow = tf.layers.conv2d(features, 2, 1, padding='same') if learn_alpha: alpha = tf.layers.conv2d(features, blur_steps, 1, padding='same', activation=tf.nn.softmax) with tf.name_scope(None, 'apply_blurs', values=[]): with tf.name_scope(None, 'warp', values=[frame, flow]): warps = identity_warp + flow[:, :, :, tf.newaxis, :] * warp_steps warps = tf.clip_by_value(warps, 0.0, max_warps) warped = contrib_resampler.resampler(frame, warps) warped = tf.concat([frame[:, :, :, tf.newaxis, :], warped], axis=3) with tf.name_scope(None, 'apply_alpha', values=[frame, flow]): if learn_alpha: mask = alpha[:, :, :, :, tf.newaxis] else: mask = 1.0 / blur_steps output_frames.append(tf.reduce_sum(warped * mask, axis=3)) with tf.name_scope(None, 'outputs', values=[output_frames]): output = tf.add_n(output_frames) / len(frames) return output
def _evaluate(self, targets): return -tf.reduce_sum(self.dist.log_prob(targets))
def multiply_fisher(self, vector): probs = self._probs return vector * probs - probs * tf.reduce_sum( vector * probs, axis=-1, keepdims=True)
def order_loss(labels, logits, margin=0.2): label_act = tf.reduce_sum(labels * logits, axis=-1, keep_dims=True) negative_cost = (1 - labels) * tf.cast( tf.greater(logits, label_act - margin), tf.float32) * tf.pow( logits + margin - label_act, 2) return negative_cost
def loss_layer(self, predict, labels): """ Define loss layer Parameters ---------- predict: TensorFlow Tensor The predicted values for the batch of data labels: TensorFlow Tensor Ground truth labels for the batch of data Returns ------- loss: TensorFlow Tensor Loss (combination of regression and classification losses) """ POS_IOU = 0.7 NEG_IOU = 0.3 rescore = int( _utils.convert_shared_float_array_to_numpy( self.config.get("od_rescore"))) lmb_coord_xy = _utils.convert_shared_float_array_to_numpy( self.config.get("lmb_coord_xy")) lmb_coord_wh = _utils.convert_shared_float_array_to_numpy( self.config.get("lmb_coord_wh")) lmb_obj = _utils.convert_shared_float_array_to_numpy( self.config.get("lmb_obj")) lmb_noobj = _utils.convert_shared_float_array_to_numpy( self.config.get("lmb_noobj")) lmb_class = _utils.convert_shared_float_array_to_numpy( self.config.get("lmb_class")) # Prediction values from model on the images ypred = _tf.reshape( predict, [-1] + list(self.grid_shape) + [self.num_anchors, 5 + self.num_classes], ) raw_xy = ypred[..., 0:2] raw_wh = ypred[..., 2:4] raw_conf = ypred[..., 4] class_scores = ypred[..., 5:] tf_anchors = _tf.constant(self.anchors) # Ground Truth info derived from ymap/labels gt_xy = labels[..., 0:2] gt_wh = labels[..., 2:4] gt_raw_wh = _tf.math.log(gt_wh / tf_anchors + 1e-5) gt_conf = labels[..., 4] gt_class = labels[..., 5:] # Calculations on predicted confidences xy = _tf.sigmoid(raw_xy) wh = _tf.exp(raw_wh) * tf_anchors wh_anchors = _tf.exp(raw_wh * 0.0) * tf_anchors lo = xy - wh / 2 hi = xy + wh / 2 gt_area = gt_wh[..., 0] * gt_wh[..., 1] gt_lo = gt_xy - gt_wh / 2 gt_hi = gt_xy + gt_wh / 2 c_inter = _tf.maximum(2 * _tf.minimum(wh_anchors / 2, gt_wh / 2), 0) c_area = wh_anchors[..., 0] * wh_anchors[..., 1] c_inter_area = c_inter[..., 0] * c_inter[..., 1] c_iou = c_inter_area / (c_area + gt_area - c_inter_area) inter = _tf.maximum(_tf.minimum(hi, gt_hi) - _tf.maximum(lo, gt_lo), 0) area = wh[..., 0] * wh[..., 1] inter_area = inter[..., 0] * inter[..., 1] iou = inter_area / (area + gt_area - inter_area) active_iou = c_iou cond_gt = _tf.cast(_tf.equal(gt_conf, _tf.constant(1.0)), dtype=_tf.float32) max_iou = _tf.reduce_max(active_iou, 3, keepdims=True) cond_max = _tf.cast(_tf.equal(active_iou, max_iou), dtype=_tf.float32) cond_above = c_iou > POS_IOU cond_logical_or = _tf.cast( _tf.math.logical_or(_tf.cast(cond_max, dtype=_tf.bool), _tf.cast(cond_above, dtype=_tf.bool)), dtype=_tf.float32, ) cond_obj = _tf.cast( _tf.math.logical_and( _tf.cast(cond_gt, dtype=_tf.bool), _tf.cast(cond_logical_or, dtype=_tf.bool), ), dtype=_tf.float32, ) kr_obj_ij = _tf.stop_gradient(cond_obj) cond_below = c_iou < NEG_IOU cond_logical_not = _tf.cast(_tf.math.logical_not( _tf.cast(cond_obj, dtype=_tf.bool)), dtype=_tf.float32) cond_noobj = _tf.cast( _tf.math.logical_and( _tf.cast(cond_below, dtype=_tf.bool), _tf.cast(cond_logical_not, dtype=_tf.bool), ), dtype=_tf.float32, ) kr_noobj_ij = _tf.stop_gradient(cond_noobj) count = _tf.reduce_sum(kr_obj_ij) eps_count = _tf.math.add(count, _tf.constant(1e-4)) scale_conf = 1 / (self.batch_size * self.grid_shape[0] * self.grid_shape[1]) kr_obj_ij_plus1 = _tf.expand_dims(kr_obj_ij, -1) if rescore: obj_gt_conf = kr_obj_ij * _tf.stop_gradient(iou) else: obj_gt_conf = kr_obj_ij obj_w_obj = kr_obj_ij * lmb_obj obj_w_noobj = kr_noobj_ij * lmb_noobj obj_w = _tf.math.add(obj_w_obj, obj_w_noobj) loss_xy = (lmb_coord_xy * _tf.reduce_sum(kr_obj_ij_plus1 * _tf.square(gt_xy - xy)) / eps_count) loss_wh = _tf.losses.huber_loss( labels=gt_raw_wh, predictions=raw_wh, weights=lmb_coord_wh * kr_obj_ij_plus1, delta=1.0, ) loss_conf = scale_conf * _tf.reduce_sum( obj_w * _tf.nn.sigmoid_cross_entropy_with_logits( labels=obj_gt_conf, logits=raw_conf)) loss_cls = (lmb_class * _tf.reduce_sum( kr_obj_ij * _tf.nn.softmax_cross_entropy_with_logits_v2( labels=gt_class, logits=class_scores)) / eps_count) losses = [loss_xy, loss_wh, loss_conf, loss_cls] loss = _tf.add_n(losses) return loss
def large_margin( # pylint: disable=invalid-name _sentinel=None, logits=None, one_hot_labels=None, layers_list=None, gamma=10000, alpha_factor=2, top_k=1, dist_norm=2, epsilon=1e-8, use_approximation=True, worst_case_loss=True, layers_weights=None, loss_collection=tf.compat.v1.GraphKeys.LOSSES): """Creates a large margin loss. Args: _sentinel: Used to prevent positional parameters. Internal, do not use. logits: Float `[batch_size, num_classes]` logits outputs of the network. one_hot_labels: `[batch_size, num_classes]` Target integer labels in `{0, 1}`. layers_list: List of network Tensors at different layers. The large margin is enforced at the layers specified. gamma: Desired margin, and distance to boundary above the margin will be clipped. alpha_factor: Factor to determine the lower bound of margin. Both gamma and alpha_factor determine points to include in training the margin these points lie with distance to boundary of [gamma * (1 - alpha), gamma] top_k: Number of top classes to include in the margin loss. dist_norm: Distance to boundary defined on norm (options: be 1, 2, np.inf). epsilon: Small number to avoid division by 0. use_approximation: If true, use approximation of the margin gradient for less computationally expensive training. worst_case_loss: (Boolean) Use the minimum distance to boundary of the top_k if true, otherwise, use the of the losses of the top_k classes. When top_k = 1, both True and False choices are equivalent. layers_weights: (List of float) Weight for loss from each layer. loss_collection: Collection to which the loss will be added. Returns: loss: Scalar `Tensor` of the same type as `logits`. Raises: ValueError: If the shape of `logits` doesn't match that of `one_hot_labels`. Also if `one_hot_labels` or `logits` is None. """ _ensure_large_margin_args("large_margin", _sentinel, one_hot_labels, logits, layers_list, dist_norm, layers_weights) logits = tf.convert_to_tensor(logits) one_hot_labels = tf.cast(one_hot_labels, logits.dtype) logits.get_shape().assert_is_compatible_with(one_hot_labels.get_shape()) layers_weights = [1.] * len( layers_list) if layers_weights is None else layers_weights assert top_k > 0 assert top_k <= logits.get_shape()[1] dual_norm = {1: np.inf, 2: 2, np.inf: 1} norm_fn = get_norm_fn(dual_norm[dist_norm]) with tf.name_scope("large_margin_loss"): class_prob = tf.nn.softmax(logits) # Pick the correct class probability. correct_class_prob = tf.reduce_sum(class_prob * one_hot_labels, axis=1, keepdims=True) # Class probabilities except the correct. other_class_prob = class_prob * (1. - one_hot_labels) if top_k > 1: # Pick the top k class probabilities other than the correct. top_k_class_prob, _ = tf.nn.top_k(other_class_prob, k=top_k) else: top_k_class_prob = tf.reduce_max(other_class_prob, axis=1, keepdims=True) # Difference between correct class probailities and top_k probabilities. difference_prob = correct_class_prob - top_k_class_prob losses_list = [] for wt, layer in zip(layers_weights, layers_list): difference_prob_grad = [ tf.layers.flatten( tf.gradients(difference_prob[:, i], layer)[0]) for i in range(top_k) ] difference_prob_gradnorm = tf.concat([ tf.map_fn(norm_fn, difference_prob_grad[i])[:, tf.newaxis] / wt for i in range(top_k) ], axis=1) if use_approximation: difference_prob_gradnorm = tf.stop_gradient( difference_prob_gradnorm) distance_to_boundary = difference_prob / ( difference_prob_gradnorm + epsilon) if worst_case_loss: # Only consider worst distance to boundary. distance_to_boundary = tf.reduce_min(distance_to_boundary, axis=1, keepdims=True) # Distances to consider between distance_upper and distance_lower bounds distance_upper = gamma distance_lower = gamma * (1 - alpha_factor) # Enforce lower bound. loss_layer = maximum_with_relu(distance_to_boundary, distance_lower) # Enforce upper bound. loss_layer = maximum_with_relu( 0, distance_upper - loss_layer) - distance_upper loss_layer = tf.reduce_sum(loss_layer, axis=1) losses_list.append(tf.reduce_mean(loss_layer)) loss = tf.reduce_mean(losses_list) # Add loss to loss_collection. tf.losses.add_loss(loss, loss_collection) return loss
def estimator_fn(features, labels, mode): target_next_position = labels simulator = _get_simulator(model_kwargs, metadata, vel_noise_std=noise_std, acc_noise_std=noise_std) print("feeature=", features['position']) # Sample the noise to add to the inputs to the model during training. sampled_noise = noise_utils.get_random_walk_noise_for_position_sequence( features['position'], noise_std_last_step=noise_std) non_kinematic_mask = tf.logical_not( get_kinematic_mask(features['particle_type'])) noise_mask = tf.cast( # tf.cast函数用于转换数据类型 non_kinematic_mask, sampled_noise.dtype)[:, tf.newaxis, tf.newaxis] # tf.newaxis 给tensor增加维度 sampled_noise *= noise_mask # sampled_noise * noise_mask # sampled_noise就是需要的噪声 # Get the predictions and target accelerations. pred_target = simulator.get_predicted_and_target_normalized_accelerations( next_position=target_next_position, position_sequence=features['position'], position_sequence_noise=sampled_noise, n_particles_per_example=features['n_particles_per_example'], particle_types=features['particle_type'], global_context=features.get('step_context')) pred_acceleration, target_acceleration = pred_target # Calculate the loss and mask out loss on kinematic particles/ loss = (pred_acceleration - target_acceleration)**2 num_non_kinematic = tf.reduce_sum( # tf.reduce_sum用于计算张量tensor沿着某一维度的和,可以在求和后降维 tf.cast(non_kinematic_mask, tf.float32)) loss = tf.where(non_kinematic_mask, loss, tf.zeros_like(loss)) # condition, x, y 相同维度,condition是bool型值,True/False # 返回值是对应元素,condition中元素为True的元素替换为x中的元素,为False的元素替换为y中对应元素 # x只负责对应替换True的元素,y只负责对应替换False的元素,x,y各有分工 # non_kinematic_mask为false,那么置零,否则置为loss值 loss = tf.reduce_sum(loss) / tf.reduce_sum(num_non_kinematic) global_step = tf.train.get_global_step() # global_step是比较常用的变量,如果和普通Variable混在一起就降低了代码的可读性。 # 返回的是global_step作为name的tensor # Set learning rate to decay from 1e-4 to 1e-6 exponentially. min_lr = 1e-6 # 学习率 lr = tf.train.exponential_decay( learning_rate=1e-4 - min_lr, # 初始学习率 global_step=global_step, # 当前迭代次数 decay_steps=int( 5e6), # 衰减速度(在迭代到该次数时,学习率衰减为learning_rate * decay_rate) # 相当于,迭代到5e6次时,缩小为原来的0.1 decay_rate=0.1) + min_lr # decay_rate 学习率衰减系数,在0~1之间 # 1.首先使用较大学习率(目的:为快速得到一个比较优的解); # 2.然后通过迭代逐步减小学习率(目的:为使模型在训练后期更加稳定) opt = tf.train.AdamOptimizer(learning_rate=lr) train_op = opt.minimize(loss, global_step) # Calculate next position and add some additional eval metrics (only eval). predicted_next_position = simulator( position_sequence=features['position'], n_particles_per_example=features['n_particles_per_example'], particle_types=features['particle_type'], global_context=features.get('step_context')) predictions = {'predicted_next_position': predicted_next_position} eval_metrics_ops = { 'loss_mse': tf.metrics.mean_squared_error(pred_acceleration, target_acceleration), 'one_step_position_mse': tf.metrics.mean_squared_error(predicted_next_position, target_next_position) } return tf.estimator.EstimatorSpec(mode=mode, train_op=train_op, loss=loss, predictions=predictions, eval_metric_ops=eval_metrics_ops)
#创建神经网 x_data = np.linspace(-1, 1, 300)[:, np.newaxis] noise = np.random.normal(0, 0.05, x_data.shape) y_data = np.square(x_data) - 0.5 + noise with tf.name_scope('inputs'): xs = tf.placeholder(tf.float32, [None, 1], name='x_input') ys = tf.placeholder(tf.float32, [None, 1], name='y_input') l1 = add_layer(xs, 1, 10, n_layer=1, activation_function=tf.nn.relu) prediction = add_layer(l1, 10, 1, n_layer=2, activation_function=None) #计算预测值prediction和真实值的误差,对二者差的平方和再取平均 with tf.name_scope('loss'): loss = tf.reduce_mean(tf.reduce_sum(tf.square(ys - prediction), reduction_indices=[1]), name='loss') tf.summary.scalar('loss', loss) with tf.name_scope('layer'): train_step = tf.train.GradientDescentOptimizer(0.1).minimize(loss) init = tf.global_variables_initializer() sess = tf.Session() merged = tf.summary.merge_all() writer = tf.summary.FileWriter("logs/", sess.graph) sess.run(init) for i in range(1000): sess.run(train_step, feed_dict={xs: x_data, ys: y_data}) if i % 50 == 0: result = sess.run(merged, feed_dict={xs: x_data, ys: y_data})
def get_prediction_module(self, bert_model, features, is_training, percent_done): final_hidden = bert_model.get_sequence_output() # sgnet # dep_mask_x = features[self.name + "_dep_mask_x"] # dep_mask_y = features[self.name + "_dep_mask_y"] # dep_mask_len = features[self.name + "_dep_mask_len"] # # def fn(xyz): # x = xyz[0] # y = xyz[1] # length = xyz[2] # x = x[:length] # y = y[:length] # st = tf.SparseTensor(indices=tf.cast(tf.transpose([x, y]), tf.int64), # values=tf.ones_like(x, dtype=tf.float32), # dense_shape=[self.config.max_seq_length, self.config.max_seq_length]) # dt = tf.sparse_tensor_to_dense(st) # return dt # # dep_mask = tf.map_fn(fn, (dep_mask_x, dep_mask_y, dep_mask_len), dtype=tf.float32) # dep_mask = features["squad_dep_mask"] # dep_mask = tf.reshape(dep_mask, [-1, self.config.max_seq_length, self.config.max_seq_length]) # with tf.variable_scope("dependence"): # bert_config = bert_model.config # dep_att_output, _ = modeling.transformer_model( # input_tensor=final_hidden, # attention_mask=dep_mask, # hidden_size=bert_config.hidden_size, # num_hidden_layers=1, # num_attention_heads=bert_config.num_attention_heads, # intermediate_size=bert_config.intermediate_size, # intermediate_act_fn=modeling.get_activation(bert_config.hidden_act), # hidden_dropout_prob=bert_config.hidden_dropout_prob, # attention_probs_dropout_prob=bert_config.attention_probs_dropout_prob, # initializer_range=bert_config.initializer_range, # do_return_all_layers=False) # weight = tf.get_variable(name="weight", dtype=tf.float32, initializer=tf.zeros_initializer(), # shape=(), trainable=True) # weight = tf.sigmoid(weight) # final_hidden = weight * final_hidden + (1 - weight) * dep_att_output final_hidden_shape = modeling.get_shape_list(final_hidden, expected_rank=3) batch_size = final_hidden_shape[0] seq_length = final_hidden_shape[1] answer_mask = tf.cast(features["input_mask"], tf.float32) answer_mask *= tf.cast(features["segment_ids"], tf.float32) answer_mask += tf.one_hot(0, seq_length) start_logits = tf.squeeze(tf.layers.dense(final_hidden, 1), -1) start_top_log_probs = tf.zeros([batch_size, self.config.beam_size]) start_top_index = tf.zeros([batch_size, self.config.beam_size], tf.int32) end_top_log_probs = tf.zeros( [batch_size, self.config.beam_size, self.config.beam_size]) end_top_index = tf.zeros( [batch_size, self.config.beam_size, self.config.beam_size], tf.int32) if self.config.joint_prediction: start_logits += 1000.0 * (answer_mask - 1) start_log_probs = tf.nn.log_softmax(start_logits) start_top_log_probs, start_top_index = tf.nn.top_k( start_log_probs, k=self.config.beam_size) if not is_training: # batch, beam, length, hidden end_features = tf.tile(tf.expand_dims(final_hidden, 1), [1, self.config.beam_size, 1, 1]) # batch, beam, length start_index = tf.one_hot(start_top_index, depth=seq_length, axis=-1, dtype=tf.float32) # batch, beam, hidden start_features = tf.reduce_sum( tf.expand_dims(final_hidden, 1) * tf.expand_dims(start_index, -1), axis=-2) # batch, beam, length, hidden start_features = tf.tile(tf.expand_dims(start_features, 2), [1, 1, seq_length, 1]) else: start_index = tf.one_hot(features[self.name + "_start_positions"], depth=seq_length, axis=-1, dtype=tf.float32) start_features = tf.reduce_sum( tf.expand_dims(start_index, -1) * final_hidden, axis=1) start_features = tf.tile(tf.expand_dims(start_features, 1), [1, seq_length, 1]) end_features = final_hidden final_repr = tf.concat([start_features, end_features], -1) final_repr = tf.layers.dense(final_repr, 512, activation=modeling.gelu, name="qa_hidden") # batch, beam, length (batch, length when training) end_logits = tf.squeeze(tf.layers.dense(final_repr, 1), -1, name="qa_logits") if is_training: end_logits += 1000.0 * (answer_mask - 1) else: end_logits += tf.expand_dims(1000.0 * (answer_mask - 1), 1) if not is_training: end_log_probs = tf.nn.log_softmax(end_logits) end_top_log_probs, end_top_index = tf.nn.top_k( end_log_probs, k=self.config.beam_size) end_logits = tf.zeros([batch_size, seq_length]) else: end_logits = tf.squeeze(tf.layers.dense(final_hidden, 1), -1) start_logits += 1000.0 * (answer_mask - 1) end_logits += 1000.0 * (answer_mask - 1) def compute_loss(logits, positions): one_hot_positions = tf.one_hot(positions, depth=seq_length, dtype=tf.float32) log_probs = tf.nn.log_softmax(logits, axis=-1) loss = -tf.reduce_sum(one_hot_positions * log_probs, axis=-1) return loss start_positions = features[self.name + "_start_positions"] end_positions = features[self.name + "_end_positions"] start_loss = compute_loss(start_logits, start_positions) end_loss = compute_loss(end_logits, end_positions) losses = (start_loss + end_loss) / 2.0 # plausible answer loss plau_logits = tf.layers.dense(final_hidden, 2) plau_logits = tf.reshape(plau_logits, [batch_size, seq_length, 2]) plau_logits = tf.transpose(plau_logits, [2, 0, 1]) unstacked_logits = tf.unstack(plau_logits, axis=0) (plau_start_logits, plau_end_logits) = (unstacked_logits[0], unstacked_logits[1]) plau_start_logits += 1000.0 * (answer_mask - 1) plau_end_logits += 1000.0 * (answer_mask - 1) plau_start_positions = features[self.name + "_plau_answer_start"] plau_end_positions = features[self.name + "_plau_answer_end"] plau_start_loss = compute_loss(plau_start_logits, plau_start_positions) plau_end_loss = compute_loss(plau_end_logits, plau_end_positions) losses += (plau_start_loss + plau_end_loss) / 2.0 # def compute_loss_for_plau(start_logits, end_logits, start_positions, end_positions, start_positions_true, # alpha=1.0, beta=1.0): # start_probs = tf.nn.softmax(start_logits) # end_probs = tf.nn.softmax(end_logits) # log_neg_start_probs = tf.log(tf.clip_by_value(1 - start_probs, 1e-30, 1)) # log_neg_end_probs = tf.log(tf.clip_by_value(1 - end_probs, 1e-30, 1)) # start_positions_mask = tf.cast(tf.sequence_mask(start_positions, maxlen=seq_length), tf.float32) # end_positions_mask = tf.cast(tf.sequence_mask(end_positions + 1, maxlen=seq_length), tf.float32) # positions_mask = end_positions_mask - start_positions_mask # one_hot_positions = tf.one_hot( # start_positions_true, depth=seq_length, dtype=tf.float32) # positions_mask = positions_mask * (1 - one_hot_positions) # 忽略切出来的无答案 # # # mask_0 = tf.zeros([batch_size, 1]) # # mask_1 = tf.ones([batch_size, seq_length - 1]) # # zero_mask = tf.concat([mask_0, mask_1], axis=1) # # positions_mask = positions_mask * zero_mask # loss1 = - tf.reduce_sum(positions_mask * log_neg_start_probs, axis=-1) # loss1 = tf.reduce_mean(loss1) # loss2 = - tf.reduce_sum(positions_mask * log_neg_end_probs, axis=-1) # loss2 = tf.reduce_mean(loss2) # return (loss1 * alpha + loss2 * beta) * 0.5 # # plau_loss = compute_loss_for_plau(start_logits, end_logits, # features[self.name + "_plau_answer_start"], # features[self.name + "_plau_answer_end"], # features[self.name + "_start_positions"], 1.0, 1.0) # losses += plau_loss answerable_logit = tf.zeros([batch_size]) if self.config.answerable_classifier: final_repr = final_hidden[:, 0] if self.config.answerable_uses_start_logits: start_p = tf.nn.softmax(start_logits) start_feature = tf.reduce_sum(tf.expand_dims(start_p, -1) * final_hidden, axis=1) final_repr = tf.concat([final_repr, start_feature], -1) final_repr = tf.layers.dense(final_repr, 512, activation=modeling.gelu) answerable_logit = tf.squeeze(tf.layers.dense(final_repr, 1), -1) answerable_loss = tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.cast(features[self.name + "_is_impossible"], tf.float32), logits=answerable_logit) losses += answerable_loss * self.config.answerable_weight return losses, dict( loss=losses, start_logits=start_logits, end_logits=end_logits, answerable_logit=answerable_logit, start_positions=features[self.name + "_start_positions"], end_positions=features[self.name + "_end_positions"], start_top_log_probs=start_top_log_probs, start_top_index=start_top_index, end_top_log_probs=end_top_log_probs, end_top_index=end_top_index, eid=features[self.name + "_eid"], )
def run_train(scope): """Trains a network. Args: scope: the scope of variables in this function """ with tf.Graph().as_default(): with tf.device(tf.train.replica_device_setter(FLAGS.ps_tasks)): to_gray = True if 'sem' in FLAGS.network_id: to_gray = False batch_frames, batch_labels = get_samples(to_gray, 'train') batch_hmg_prediction, _ = predict_homography( batch_frames, network_id=FLAGS.network_id, is_training=True, scope=scope) if FLAGS.loss == 'hier_l2': for level in range(FLAGS.num_level): delta_level = FLAGS.num_level - level - 1 scale = 2**delta_level l2 = tf.losses.mean_squared_error( batch_labels / scale, batch_hmg_prediction[level]) slim.summaries.add_scalar_summary(l2, 'l2%d' % delta_level, 'losses') elif FLAGS.loss == 'hier_ld': for level in range(FLAGS.num_level): delta_level = FLAGS.num_level - level - 1 scale = 2**delta_level diff = tf.reshape( batch_labels / scale - batch_hmg_prediction[level], [FLAGS.batch_size, 4, 2]) l2d = tf.reduce_mean( tf.sqrt(tf.reduce_sum(tf.square(diff), 2))) tf.losses.add_loss(l2d) slim.summaries.add_scalar_summary(l2d, 'l2%d' % delta_level, 'losses') else: l2 = tf.losses.mean_squared_error( batch_labels, batch_hmg_prediction[FLAGS.num_level - 1]) slim.summaries.add_scalar_summary(slim.losses.get_total_loss(), 'loss', 'losses') global_step = slim.get_or_create_global_step() learning_rate_decay = tf.train.exponential_decay( learning_rate=FLAGS.learning_rate, global_step=global_step, decay_steps=FLAGS.lr_decay_steps, decay_rate=FLAGS.lr_decay_rate, staircase=True) optimizer = tf.train.AdamOptimizer(learning_rate_decay) is_chief = (FLAGS.task == 0) train_op = slim.learning.create_train_op( slim.losses.get_total_loss(), optimizer=optimizer) saver = tf.train.Saver(max_to_keep=20) if FLAGS.level_wise == 0: variables_to_restore = [] for i in range(0, FLAGS.num_level - 1): variables = slim.get_variables(scope='%s/level%d' % (scope, i)) variables_to_restore = variables_to_restore + variables init_fn = slim.assign_from_checkpoint_fn( FLAGS.model_path, variables_to_restore) elif 'sem' in FLAGS.network_id: variables_to_restore = slim.get_variables(scope='vgg_16') init_fn = slim.assign_from_checkpoint_fn( FLAGS.vgg_model_path, variables_to_restore) else: init_fn = None slim.learning.train(train_op=train_op, logdir=FLAGS.train_dir, save_summaries_secs=60, save_interval_secs=600, saver=saver, number_of_steps=FLAGS.max_step, master=FLAGS.master, is_chief=is_chief, init_fn=init_fn)
tf.disable_v2_behavior() env = gym.make('FrozenLake-v0') tf.reset_default_graph() #These lines establish the feed-forward part of the network used to choose actions inputs1 = tf.placeholder(shape=[1, 16], dtype=tf.float32) W = tf.Variable(tf.random_uniform([16, 4], 0, 0.01)) Qout = tf.matmul(inputs1, W) predict = tf.argmax(Qout, 1) #Below we obtain the loss by taking the sum of squares difference between the target and prediction Q values. nextQ = tf.placeholder(shape=[1, 4], dtype=tf.float32) loss = tf.reduce_sum(tf.square(nextQ - Qout)) trainer = tf.train.GradientDescentOptimizer(learning_rate=0.1) updateModel = trainer.minimize(loss) init = tf.initialize_all_variables() # Set learning parameters y = .99 e = 0.1 num_episodes = 2000 #create lists to contain total rewards and steps per episode jList = [] rList = [] with tf.Session() as sess: sess.run(init) for i in range(num_episodes):
h_pool2_flat = tf.reshape(h_pool2, [-1, 20 * 9 * 32]) h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) # Dropout with tf.name_scope('Dropout'): keep_prob = tf.placeholder("float") h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) # Readout Layer with tf.name_scope('Softmax'): W_fc2 = weight_variable([128, 10]) b_fc2 = bias_variable([10]) y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2) with tf.name_scope('Loss'): cross_entropy = -tf.reduce_sum( y_ * tf.log(tf.clip_by_value(y_conv, 1e-8, 1))) with tf.name_scope('Train'): train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) with tf.name_scope('Accuracy'): correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) tf.initialize_all_variables().run() for i in range(15000): if i % 20 == 0: train_accuracy = accuracy.eval(feed_dict={ x: data[i],
def cat_entropy_softmax(p0): return -tf.reduce_sum(p0 * tf.log(p0 + 1e-6), axis=1)
def selective_crop_and_resize(features, boxes, box_levels, boundaries, output_size=7, sample_offset=0.5, use_einsum_gather=False): """Crop and resize boxes on a set of feature maps. Given multiple features maps indexed by different levels, and a set of boxes where each box is mapped to a certain level, it selectively crops and resizes boxes from the corresponding feature maps to generate the box features. We follow the ROIAlign technique (see https://arxiv.org/pdf/1703.06870.pdf, figure 3 for reference). Specifically, for each feature map, we select an (output_size, output_size) set of pixels corresponding to the box location, and then use bilinear interpolation to select the feature value for each pixel. For performance, we perform the gather and interpolation on all layers as a single operation. In this op the multi-level features are first stacked and gathered into [2*output_size, 2*output_size] feature points. Then bilinear interpolation is performed on the gathered feature points to generate [output_size, output_size] RoIAlign feature map. Here is the step-by-step algorithm: 1. The multi-level features are gathered into a [batch_size, num_boxes, output_size*2, output_size*2, num_filters] Tensor. The Tensor contains four neighboring feature points for each vertice in the output grid. 2. Compute the interpolation kernel of shape [batch_size, num_boxes, output_size*2, output_size*2]. The last 2 axis can be seen as stacking 2x2 interpolation kernels for all vertices in the output grid. 3. Element-wise multiply the gathered features and interpolation kernel. Then apply 2x2 average pooling to reduce spatial dimension to output_size. Args: features: a 5-D tensor of shape [batch_size, num_levels, max_height, max_width, num_filters] where cropping and resizing are based. boxes: a 3-D tensor of shape [batch_size, num_boxes, 4] encoding the information of each box w.r.t. the corresponding feature map. boxes[:, :, 0:2] are the grid position in (y, x) (float) of the top-left corner of each box. boxes[:, :, 2:4] are the box sizes in (h, w) (float) in terms of the number of pixels of the corresponding feature map size. box_levels: a 3-D tensor of shape [batch_size, num_boxes, 1] representing the 0-based corresponding feature level index of each box. boundaries: a 3-D tensor of shape [batch_size, num_boxes, 2] representing the boundary (in (y, x)) of the corresponding feature map for each box. Any resampled grid points that go beyond the bounary will be clipped. output_size: a scalar indicating the output crop size. sample_offset: a float number in [0, 1] indicates the subpixel sample offset from grid point. use_einsum_gather: use einsum to replace gather or not. Replacing einsum with gather can improve performance when feature size is not large, einsum is friendly with model partition as well. Gather's performance is better when feature size is very large and there are multiple box levels. Returns: features_per_box: a 5-D tensor of shape [batch_size, num_boxes, output_size, output_size, num_filters] representing the cropped features. """ (batch_size, num_levels, max_feature_height, max_feature_width, num_filters) = features.get_shape().as_list() if batch_size is None: batch_size = tf.shape(features)[0] _, num_boxes, _ = boxes.get_shape().as_list() kernel_y, kernel_x, box_gridy0y1, box_gridx0x1 = compute_grid_positions( boxes, boundaries, output_size, sample_offset) x_indices = tf.cast( tf.reshape(box_gridx0x1, [batch_size, num_boxes, output_size * 2]), dtype=tf.int32) y_indices = tf.cast( tf.reshape(box_gridy0y1, [batch_size, num_boxes, output_size * 2]), dtype=tf.int32) if use_einsum_gather: # Blinear interpolation is done during the last two gathers: # f(y, x) = [hy, ly] * [[f00, f01], * [hx, lx]^T # [f10, f11]] # [[f00, f01], # [f10, f11]] = tf.einsum(tf.einsum(features, y_one_hot), x_one_hot) # where [hy, ly] and [hx, lx] are the bilinear interpolation kernel. # shape is [batch_size, boxes, output_size, 2, 1] grid_y_one_hot, grid_x_one_hot = get_grid_one_hot(box_gridy0y1, box_gridx0x1, max_feature_height, max_feature_width) # shape is [batch_size, num_boxes, output_size, height] grid_y_weight = tf.reduce_sum( tf.multiply(grid_y_one_hot, kernel_y), axis=-2) # shape is [batch_size, num_boxes, output_size, width] grid_x_weight = tf.reduce_sum( tf.multiply(grid_x_one_hot, kernel_x), axis=-2) # Gather for y_axis. # shape is [batch_size, num_boxes, output_size, width, features] features_per_box = tf.einsum('bmhwf,bmoh->bmowf', features, tf.cast(grid_y_weight, features.dtype)) # Gather for x_axis. # shape is [batch_size, num_boxes, output_size, output_size, features] features_per_box = tf.einsum('bmhwf,bmow->bmhof', features_per_box, tf.cast(grid_x_weight, features.dtype)) else: height_dim_offset = max_feature_width level_dim_offset = max_feature_height * height_dim_offset batch_dim_offset = num_levels * level_dim_offset batch_size_offset = tf.tile( tf.reshape( tf.range(batch_size) * batch_dim_offset, [batch_size, 1, 1, 1]), [1, num_boxes, output_size * 2, output_size * 2]) box_levels_offset = tf.tile( tf.reshape(box_levels * level_dim_offset, [batch_size, num_boxes, 1, 1]), [1, 1, output_size * 2, output_size * 2]) y_indices_offset = tf.tile( tf.reshape(y_indices * height_dim_offset, [batch_size, num_boxes, output_size * 2, 1]), [1, 1, 1, output_size * 2]) x_indices_offset = tf.tile( tf.reshape(x_indices, [batch_size, num_boxes, 1, output_size * 2]), [1, 1, output_size * 2, 1]) indices = tf.reshape( batch_size_offset + box_levels_offset + y_indices_offset + x_indices_offset, [-1]) features = tf.reshape(features, [-1, num_filters]) # TODO(wangtao): replace tf.gather with tf.gather_nd and try to get similar # performance. features_per_box = tf.reshape( tf.gather(features, indices), [batch_size, num_boxes, output_size * 2, output_size * 2, num_filters]) features_per_box = feature_bilinear_interpolation(features_per_box, kernel_y, kernel_x) return features_per_box
def eval_metrics_host_call_fn(policy_output, value_output, pi_tensor, policy_cost, value_cost, l2_cost, combined_cost, step, est_mode=tf.estimator.ModeKeys.TRAIN): policy_entropy = -tf.reduce_mean( tf.reduce_sum(policy_output * tf.log(policy_output), axis=1)) # pi_tensor is one_hot when generated from sgfs (for supervised learning) # and soft-max when using self-play records. argmax normalizes the two. policy_target_top_1 = tf.argmax(pi_tensor, axis=1) policy_output_in_top1 = tf.to_float( tf.nn.in_top_k(policy_output, policy_target_top_1, k=1)) policy_output_in_top3 = tf.to_float( tf.nn.in_top_k(policy_output, policy_target_top_1, k=3)) policy_top_1_confidence = tf.reduce_max(policy_output, axis=1) # policy_target_top_1_confidence = tf.boolean_mask( # policy_output, # tf.one_hot(policy_target_top_1, tf.shape(policy_output)[1])) value_cost_normalized = value_cost / params['value_cost_weight'] with tf.variable_scope("metrics"): metric_ops = { 'policy_cost': tf.metrics.mean(policy_cost), 'value_cost': tf.metrics.mean(value_cost), 'value_cost_normalized': tf.metrics.mean(value_cost_normalized), 'l2_cost': tf.metrics.mean(l2_cost), 'policy_entropy': tf.metrics.mean(policy_entropy), 'combined_cost': tf.metrics.mean(combined_cost), 'policy_accuracy_top_1': tf.metrics.mean(policy_output_in_top1), 'policy_accuracy_top_3': tf.metrics.mean(policy_output_in_top3), 'policy_top_1_confidence': tf.metrics.mean(policy_top_1_confidence), # 'policy_target_top_1_confidence': tf.metrics.mean( # policy_target_top_1_confidence), 'value_confidence': tf.metrics.mean(tf.abs(value_output)), } if est_mode == tf.estimator.ModeKeys.EVAL: return metric_ops # NOTE: global_step is rounded to a multiple of FLAGS.summary_steps. eval_step = tf.reduce_min(step) # Create summary ops so that they show up in SUMMARIES collection # That way, they get logged automatically during training summary_writer = summary.create_file_writer(FLAGS.work_dir) with summary_writer.as_default(), \ summary.record_summaries_every_n_global_steps( params['summary_steps'], eval_step): for metric_name, metric_op in metric_ops.items(): summary.scalar(metric_name, metric_op[1], step=eval_step) # Reset metrics occasionally so that they are mean of recent batches. reset_op = tf.variables_initializer(tf.local_variables("metrics")) cond_reset_op = tf.cond( tf.equal(eval_step % params['summary_steps'], tf.to_int64(1)), lambda: reset_op, lambda: tf.no_op()) return summary.all_summary_ops() + [cond_reset_op]
def train_function(training_method, loss, cross_loss, reg_loss, output_dir, use_tpu): """Training script for resnet model. Args: training_method: string indicating pruning method used to compress model. loss: tensor float32 of the cross entropy + regularization losses. cross_loss: tensor, only cross entropy loss, passed for logging. reg_loss: tensor, only regularization loss, passed for logging. output_dir: string tensor indicating the directory to save summaries. use_tpu: boolean indicating whether to run script on a tpu. Returns: host_call: summary tensors to be computed at each training step. train_op: the optimization term. """ global_step = tf.train.get_global_step() steps_per_epoch = FLAGS.num_train_images / FLAGS.train_batch_size current_epoch = (tf.cast(global_step, tf.float32) / steps_per_epoch) learning_rate = lr_schedule(current_epoch) if FLAGS.use_adam: # We don't use step decrease for the learning rate. learning_rate = FLAGS.base_learning_rate * (FLAGS.train_batch_size / 256.0) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) else: optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=FLAGS.momentum, use_nesterov=True) if use_tpu: # use CrossShardOptimizer when using TPU. optimizer = contrib_tpu.CrossShardOptimizer(optimizer) if training_method == 'set': # We override the train op to also update the mask. optimizer = sparse_optimizers.SparseSETOptimizer( optimizer, begin_step=FLAGS.maskupdate_begin_step, end_step=FLAGS.maskupdate_end_step, grow_init=FLAGS.grow_init, frequency=FLAGS.maskupdate_frequency, drop_fraction=FLAGS.drop_fraction, drop_fraction_anneal=FLAGS.drop_fraction_anneal, stateless_seed_offset=FLAGS.seed) elif training_method == 'static': # We override the train op to also update the mask. optimizer = sparse_optimizers.SparseStaticOptimizer( optimizer, begin_step=FLAGS.maskupdate_begin_step, end_step=FLAGS.maskupdate_end_step, grow_init=FLAGS.grow_init, frequency=FLAGS.maskupdate_frequency, drop_fraction=FLAGS.drop_fraction, drop_fraction_anneal=FLAGS.drop_fraction_anneal, stateless_seed_offset=FLAGS.seed) elif training_method == 'momentum': # We override the train op to also update the mask. optimizer = sparse_optimizers.SparseMomentumOptimizer( optimizer, begin_step=FLAGS.maskupdate_begin_step, end_step=FLAGS.maskupdate_end_step, momentum=FLAGS.s_momentum, frequency=FLAGS.maskupdate_frequency, drop_fraction=FLAGS.drop_fraction, grow_init=FLAGS.grow_init, stateless_seed_offset=FLAGS.seed, drop_fraction_anneal=FLAGS.drop_fraction_anneal, use_tpu=use_tpu) elif training_method == 'rigl': # We override the train op to also update the mask. optimizer = sparse_optimizers.SparseRigLOptimizer( optimizer, begin_step=FLAGS.maskupdate_begin_step, end_step=FLAGS.maskupdate_end_step, grow_init=FLAGS.grow_init, frequency=FLAGS.maskupdate_frequency, drop_fraction=FLAGS.drop_fraction, stateless_seed_offset=FLAGS.seed, drop_fraction_anneal=FLAGS.drop_fraction_anneal, initial_acc_scale=FLAGS.rigl_acc_scale, use_tpu=use_tpu) elif training_method == 'snip': optimizer = sparse_optimizers.SparseSnipOptimizer( optimizer, mask_init_method=FLAGS.mask_init_method, custom_sparsity_map=CUSTOM_SPARSITY_MAP, default_sparsity=FLAGS.end_sparsity, use_tpu=use_tpu) elif training_method in ('scratch', 'baseline'): pass else: raise ValueError('Unsupported pruning method: %s' % FLAGS.training_method) # UPDATE_OPS needs to be added as a dependency due to batch norm update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops), tf.name_scope('train'): train_op = optimizer.minimize(loss, global_step) metrics = { 'global_step': tf.train.get_or_create_global_step(), 'loss': loss, 'cross_loss': cross_loss, 'reg_loss': reg_loss, 'learning_rate': learning_rate, 'current_epoch': current_epoch, } # Logging drop_fraction if dynamic sparse training. if training_method in ('set', 'momentum', 'rigl', 'static'): metrics['drop_fraction'] = optimizer.drop_fraction # Let's log some statistics from a single parameter-mask couple. # This is useful for debugging. test_var = pruning.get_weights()[0] test_var_mask = pruning.get_masks()[0] metrics.update({ 'fw_nz_weight': tf.count_nonzero(test_var), 'fw_nz_mask': tf.count_nonzero(test_var_mask), 'fw_l1_weight': tf.reduce_sum(tf.abs(test_var)) }) masks = pruning.get_masks() global_sparsity = sparse_utils.calculate_sparsity(masks) metrics['global_sparsity'] = global_sparsity metrics.update( utils.mask_summaries(masks[:4] + masks[-1:], with_img=FLAGS.log_mask_imgs_each_iteration)) host_call = (functools.partial(utils.host_call_fn, output_dir), utils.format_tensors(metrics)) return host_call, train_op
def base_conditional(Kmn, Kmm, Knn, f, *, full_cov=False, q_sqrt=None, white=False): """ Given a g1 and g2, and distribution p and q such that p(g2) = N(g2;0,Kmm) p(g1) = N(g1;0,Knn) p(g1|g2) = N(g1;0,Knm) And q(g2) = N(g2;f,q_sqrt*q_sqrt^T) This method computes the mean and (co)variance of q(g1) = \int q(g2) p(g1|g2) :param Kmn: M x N :param Kmm: M x M :param Knn: N x N or N :param f: M x R :param full_cov: bool :param q_sqrt: None or R x M x M (lower triangular) :param white: bool :return: N x R or R x N x N """ # compute kernel stuff num_func = tf.shape(f)[1] # R Lm = tf.cholesky(Kmm) # Compute the projection matrix A A = tf.matrix_triangular_solve(Lm, Kmn, lower=True) # compute the covariance due to the conditioning if full_cov: fvar = Knn - tf.matmul(A, A, transpose_a=True) fvar = tf.tile(fvar[None, :, :], [num_func, 1, 1]) # R x N x N else: fvar = Knn - tf.reduce_sum(tf.square(A), 0) fvar = tf.tile(fvar[None, :], [num_func, 1]) # R x N # another backsubstitution in the unwhitened case if not white: A = tf.matrix_triangular_solve(tf.transpose(Lm), A, lower=False) # construct the conditional mean fmean = tf.matmul(A, f, transpose_a=True) if q_sqrt is not None: if q_sqrt.get_shape().ndims == 2: LTA = A * tf.expand_dims(tf.transpose(q_sqrt), 2) # R x M x N elif q_sqrt.get_shape().ndims == 3: L = q_sqrt A_tiled = tf.tile(tf.expand_dims(A, 0), tf.stack([num_func, 1, 1])) LTA = tf.matmul(L, A_tiled, transpose_a=True) # R x M x N else: # pragma: no cover raise ValueError("Bad dimension for q_sqrt: %s" % str(q_sqrt.get_shape().ndims)) if full_cov: fvar = fvar + tf.matmul(LTA, LTA, transpose_a=True) # R x N x N else: fvar = fvar + tf.reduce_sum(tf.square(LTA), 1) # R x N if not full_cov: fvar = tf.transpose(fvar) # N x R return fmean, fvar # N x R, R x N x N or N x R
def train(train_list, val_list, debug_mode=True): print('Running PRLNet -Training!') # create folders to save trained model and results graph_dir = './graph' checkpt_dir = './model' ouput_dir = './output' exists_or_mkdir(graph_dir, need_remove=True) exists_or_mkdir(ouput_dir) exists_or_mkdir(checkpt_dir) # --------------------------------- load data --------------------------------- # data fetched at range: [-1,1] input_imgs, target_imgs, num = input_producer(train_list, in_channels, batch_size, need_shuffle=True) if debug_mode: input_val, target_val, num_val = input_producer(val_list, in_channels, batch_size, need_shuffle=False) pred_content, pred_detail, pred_imgs = gen_PRLNet(input_imgs, out_channels, is_train=True, reuse=False) if debug_mode: _, _, pred_val = gen_PRLNet(input_val, out_channels, is_train=False, reuse=True) # --------------------------------- loss terms --------------------------------- with tf.name_scope('Loss') as loss_scp: target_224 = tf.image.resize_images(target_imgs, size=[224, 224], method=0, align_corners=False) predict_224 = tf.image.resize_images(pred_imgs, size=[224, 224], method=0, align_corners=False) vgg19_api = VGG19("vgg19.npy") vgg_map_targets = vgg19_api.build((target_224 + 1) / 2, is_rgb=(in_channels == 3)) vgg_map_predict = vgg19_api.build((predict_224 + 1) / 2, is_rgb=(in_channels == 3)) content_loss = tf.losses.mean_squared_error(target_imgs, pred_content) vgg_loss = 2e-6 * tf.losses.mean_squared_error(vgg_map_targets, vgg_map_predict) l1_loss = tf.reduce_mean(tf.abs(target_imgs - pred_imgs)) mse_loss = tf.losses.mean_squared_error(target_imgs, pred_imgs) loss_op = content_loss + 2 * vgg_loss + l1_loss # --------------------------------- solver definition --------------------------------- global_step = tf.Variable(0, name='global_step', trainable=False) iters_per_epoch = np.floor_divide(num, batch_size) lr_decay = tf.train.polynomial_decay( learning_rate=learning_rate, global_step=global_step, decay_steps=iters_per_epoch * n_epochs, end_learning_rate=learning_rate / 100.0, power=0.9) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.name_scope('optimizer'): with tf.control_dependencies(update_ops): gen_vars = [ var for var in tf.trainable_variables() if var.name.startswith("PRLNet") ] gen_optim = tf.train.AdamOptimizer(lr_decay, beta1) gen_grads_and_vars = gen_optim.compute_gradients(loss_op, var_list=gen_vars) train_op = gen_optim.apply_gradients(gen_grads_and_vars, global_step=global_step) # --------------------------------- model training --------------------------------- ''' if debug_mode: with tf.name_scope('summarise') as sum_scope: tf.summary.scalar('loss', loss_op) tf.summary.scalar('learning rate', lr_decay) tf.summary.image('predicts', pred_imgs, max_outputs=9) summary_op = tf.summary.merge_all() ''' with tf.name_scope("parameter_count"): num_parameters = tf.reduce_sum( [tf.reduce_prod(tf.shape(v)) for v in tf.trainable_variables()]) # set GPU resources config = tf.ConfigProto() config.gpu_options.allow_growth = True #config.gpu_options.per_process_gpu_memory_fraction = 0.45 saver = tf.train.Saver(max_to_keep=1) loss_list = [] psnr_list = [] with tf.Session(config=config) as sess: coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) sess.run(tf.global_variables_initializer()) print(">>------------>>> [Training_Num] =%d" % num) print(">>------------>>> [Parameter_Num] =%d" % sess.run(num_parameters)) ''' if debug_mode: with tf.name_scope(sum_scope): summary_writer = tf.summary.FileWriter(graph_dir, graph=sess.graph) ''' for epoch in range(0, n_epochs): start_time = time.time() epoch_loss, n_iters = 0, 0 for step in range(0, num, batch_size): _, loss = sess.run([train_op, loss_op]) epoch_loss += loss n_iters += 1 # iteration information if n_iters % display_steps == 0: tm = datetime.datetime.now().strftime( '%Y-%m-%d %H:%M:%S.%f') print("%s >> [%d/%d] iter: %d loss: %4.4f" % (tm, epoch, n_epochs, n_iters, loss)) ''' if debug_mode: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) ''' # epoch information epoch_loss = epoch_loss / n_iters loss_list.append(epoch_loss) print( "[*] ----- Epoch: %d/%d | Loss: %4.4f | Time-consumed: %4.3f -----" % (epoch, n_epochs, epoch_loss, (time.time() - start_time))) if (epoch + 1) % save_epochs == 0: if debug_mode: print("----- validating model ...") mean_psnr, nn = 0, 0 for idx in range(0, num_val, batch_size): predicts, groundtruths = sess.run( [pred_val, target_val]) save_images_from_batch(predicts, ouput_dir, idx) psnr = measure_psnr(predicts, groundtruths) mean_psnr += psnr nn += 1 psnr_list.append(mean_psnr / nn) print("----- psnr:%4.4f" % (mean_psnr / nn)) print("----- saving model ...") saver.save(sess, os.path.join(checkpt_dir, "model.cpkt"), global_step=global_step) save_list(os.path.join(ouput_dir, "loss"), loss_list) save_list(os.path.join(ouput_dir, "psnr"), psnr_list) # stop data queue coord.request_stop() coord.join(threads) # write out the loss list save_list(os.path.join(ouput_dir, "loss"), loss_list) save_list(os.path.join(ouput_dir, "psnr"), psnr_list) print("Training finished!") return None
def parse_and_preprocess(self, example_proto): """ Returns: crops: a float tensor with shape [num_persons, height, width, 17]. labels: a float tensor with shape [num_persons, height, width, 17]. """ features = { 'image': tf.FixedLenFeature([], tf.string), 'num_persons': tf.FixedLenFeature([], tf.int64), 'boxes': tf.FixedLenSequenceFeature([], tf.float32, allow_missing=True), 'keypoints': tf.FixedLenSequenceFeature([], tf.int64, allow_missing=True) } parsed_features = tf.parse_single_example(example_proto, features) # get size of the image shape = tf.image.extract_jpeg_shape(parsed_features['image']) image_height, image_width = shape[0], shape[1] scaler = tf.to_float(tf.stack(2 * [image_height, image_width])) # get number of people on the image num_persons = tf.to_int32(parsed_features['num_persons']) # it is assumed that num_persons > 0 # get groundtruth boxes, they are in absolute coordinates boxes = tf.reshape(parsed_features['boxes'], [num_persons, 4]) # get keypoints, they are in absolute coordinates keypoints = tf.to_int32(parsed_features['keypoints']) keypoints = tf.reshape(keypoints, [num_persons, 17, 3]) if self.max_keypoints is not None: # curriculum learning by sorting # annotations based on number of keypoints is_visible = tf.to_int32( keypoints[:, :, 2] > 0) # shape [num_persons, 17] is_good = tf.less_equal(tf.reduce_sum(is_visible, axis=1), self.max_keypoints) # it has shape [num_persons] keypoints = tf.boolean_mask(keypoints, is_good) boxes = tf.boolean_mask(boxes, is_good) num_persons = tf.shape(boxes)[0] heatmaps = tf.py_func( lambda k, b, w, h: get_heatmaps(k, b, w, h, DOWNSAMPLE), [keypoints, boxes, image_width, image_height], tf.float32, stateful=False) heatmaps.set_shape([None, None, 17]) box_indices = tf.zeros([num_persons], dtype=tf.int32) crops = tf.image.crop_and_resize(tf.expand_dims(heatmaps, 0), boxes / scaler, box_indices, crop_size=CROP_SIZE) def fn(x): """ Arguments: keypoints: a float tensor with shape [17, 3]. box: a float tensor with shape [4]. Returns: a float tensor with shape [height, width, 17]. """ keypoints, box = x ymin, xmin, ymax, xmax = tf.unstack(box, axis=0) y, x, v = tf.unstack(keypoints, axis=1) keypoints = tf.stack([y, x], axis=1) part_id = tf.where(v > 0.0) # shape [num_visible, 1] part_id = tf.to_int32(part_id) num_visible = tf.shape(part_id)[0] keypoints = tf.gather(keypoints, tf.squeeze(part_id, 1)) # it has shape [num_visible, 2], they have absolute coordinates # transform keypoints coordinates # to be relative to the box h, w = ymax - ymin, xmax - xmin height, width = CROP_SIZE translation = tf.stack([ymin, xmin]) scaler = tf.to_float(tf.stack([height / h, width / w], axis=0)) keypoints -= translation keypoints *= scaler keypoints = tf.to_int32(tf.round(keypoints)) # it has shape [num_visible, 2] y, x = tf.unstack(keypoints, axis=1) y = tf.clip_by_value(y, 0, height - 1) x = tf.clip_by_value(x, 0, width - 1) keypoints = tf.stack([y, x], axis=1) indices = tf.to_int64(tf.concat([keypoints, part_id], axis=1)) values = tf.ones([num_visible], dtype=tf.float32) binary_map = tf.sparse.SparseTensor( indices, values, dense_shape=[height, width, 17]) binary_map = tf.sparse.to_dense(binary_map, default_value=0, validate_indices=False) return binary_map labels = tf.map_fn( fn, (tf.to_float(keypoints), boxes), dtype=tf.float32, back_prop=False, ) if self.is_training: crops, labels = random_flip_left_right(crops, labels) return crops, labels
import tensorflow.compat.v1 as tf import functools # tf.logging.set_verbosity(tf.compat.v1.logging.ERROR) tf.disable_v2_behavior() @functools.lru_cache() def count(data_x, function): x = tf.placeholder(shape=[len(data_x)], dtype=tf.float32, name="x") func = function(x) gradients = tf.gradients(func, x) hessians = tf.hessians(func, x) sess = tf.Session() return sess.run([tf.global_variables_initializer(), func, gradients, hessians], feed_dict={x: data_x})[1:] if __name__ == '__main__': print(*count([-10., 20., 30.], lambda x: tf.reduce_sum(x * x * x) / 3), sep="\n")
def benchmark_model(self, warmup_runs, bm_runs, num_threads, trace_filename=None): """Benchmark model.""" if self.tensorrt: print('Using tensorrt ', self.tensorrt) graphdef = self.freeze_model() if num_threads > 0: print('num_threads for benchmarking: {}'.format(num_threads)) sess_config = tf.ConfigProto( intra_op_parallelism_threads=num_threads, inter_op_parallelism_threads=1) else: sess_config = tf.ConfigProto() # rewriter_config_pb2.RewriterConfig.OFF sess_config.graph_options.rewrite_options.dependency_optimization = 2 if self.use_xla: sess_config.graph_options.optimizer_options.global_jit_level = ( tf.OptimizerOptions.ON_2) with tf.Graph().as_default(), tf.Session(config=sess_config) as sess: inputs = tf.placeholder(tf.float32, name='input', shape=self.inputs_shape) output = self.build_model(inputs) img = np.random.uniform(size=self.inputs_shape) sess.run(tf.global_variables_initializer()) if self.tensorrt: fetches = [inputs.name] + [i.name for i in output] goutput = self.convert_tr(graphdef, fetches) inputs, output = goutput[0], goutput[1:] if not self.use_xla: # Don't use tf.group because XLA removes the whole graph for tf.group. output = tf.group(*output) else: output = tf.add_n([tf.reduce_sum(x) for x in output]) output_name = [output.name] input_name = inputs.name graphdef = tf.graph_util.convert_variables_to_constants( sess, sess.graph_def, output_name) with tf.Graph().as_default(), tf.Session(config=sess_config) as sess: tf.import_graph_def(graphdef, name='') for i in range(warmup_runs): start_time = time.time() sess.run(output_name, feed_dict={input_name: img}) logging.info('Warm up: {} {:.4f}s'.format( i, time.time() - start_time)) print('Start benchmark runs total={}'.format(bm_runs)) start = time.perf_counter() for i in range(bm_runs): sess.run(output_name, feed_dict={input_name: img}) end = time.perf_counter() inference_time = (end - start) / bm_runs print('Per batch inference time: ', inference_time) print('FPS: ', self.batch_size / inference_time) if trace_filename: run_options = tf.RunOptions() run_options.trace_level = tf.RunOptions.FULL_TRACE run_metadata = tf.RunMetadata() sess.run(output_name, feed_dict={input_name: img}, options=run_options, run_metadata=run_metadata) logging.info('Dumping trace to %s', trace_filename) trace_dir = os.path.dirname(trace_filename) if not tf.io.gfile.exists(trace_dir): tf.io.gfile.makedirs(trace_dir) with tf.io.gfile.GFile(trace_filename, 'w') as trace_file: trace = timeline.Timeline( step_stats=run_metadata.step_stats) trace_file.write( trace.generate_chrome_trace_format(show_memory=True))
def is_nonzero_chunk(example): """A chunk is zero if all targets are 0s.""" return tf.less(0, tf.reduce_sum(tf.abs(example["targets"])))