def l2loss(params): if len(params) == 0: return tf.constant(0.0) else: return tf.add_n([sum(tf.square(p)) for p in params])
def model_fn(features, labels, mode, params): """ Create the model for estimator api Args: features: tensor with shape [BATCH_SIZE, go.N, go.N, features_lib.NEW_FEATURES_PLANES] labels: dict from string to tensor with shape 'pi_tensor': [BATCH_SIZE, go.N * go.N + 1] 'value_tensor': [BATCH_SIZE] mode: a tf.estimator.ModeKeys (batchnorm params update for TRAIN only) params: A dictionary (Typically derived from the FLAGS object.) Returns: tf.estimator.EstimatorSpec with props mode: same as mode arg predictions: dict of tensors 'policy': [BATCH_SIZE, go.N * go.N + 1] 'value': [BATCH_SIZE] loss: a single value tensor train_op: train op eval_metric_ops return dict of tensors logits: [BATCH_SIZE, go.N * go.N + 1] """ policy_output, value_output, logits = model_inference_fn( features, mode == tf.estimator.ModeKeys.TRAIN, params) if type(labels) is dict: from collections import namedtuple label = namedtuple('Label', 'pi_tensor value_tensor') label.pi_tensor = labels['pi_tensor'] label.value_tensor = labels['value_tensor'] labels = label elif type(labels) is tf.Tensor: from collections import namedtuple label = namedtuple('Label', 'pi_tensor value_tensor') label.pi_tensor = labels[:-1] label.value_tensor = labels[-1] labels = label elif labels is None: from collections import namedtuple label = namedtuple('Label', 'pi_tensor value_tensor') label.pi_tensor = tf.placeholder(tf.float32, [None, go.N * go.N + 1]) label.value_tensor = tf.placeholder(tf.float32, [None]) labels = label # train ops policy_cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=tf.stop_gradient( labels.pi_tensor))) value_cost = params['value_cost_weight'] * tf.reduce_mean( tf.square(value_output - labels.value_tensor)) reg_vars = [ v for v in tf.trainable_variables() if 'bias' not in v.name and 'beta' not in v.name ] l2_cost = params['l2_strength'] * \ tf.add_n([tf.nn.l2_loss(v) for v in reg_vars]) combined_cost = policy_cost + value_cost + l2_cost global_step = tf.train.get_or_create_global_step().value() learning_rate = tf.train.piecewise_constant(global_step, params['lr_boundaries'], params['lr_rates']) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # Insert quantization ops if requested if params['quantize']: if mode == tf.estimator.ModeKeys.TRAIN: tf.contrib.quantize.create_training_graph( quant_delay=params['quant_delay']) else: tf.contrib.quantize.create_eval_graph() optimizer = tf.train.MomentumOptimizer(learning_rate, params['sgd_momentum']) if params['use_tpu']: optimizer = tpu_optimizer.CrossShardOptimizer(optimizer) elif params['use_ipu']: optimizer = CrossReplicaOptimizer(optimizer) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(combined_cost) # Computations to be executed on CPU, outside of the main TPU queues. def eval_metrics_host_call_fn(policy_output, value_output, pi_tensor, policy_cost, value_cost, l2_cost, combined_cost, step, est_mode=tf.estimator.ModeKeys.TRAIN): policy_entropy = -tf.reduce_mean( tf.reduce_sum(policy_output * tf.log(policy_output), axis=1)) # pi_tensor is one_hot when generated from sgfs (for supervised learning) # and soft-max when using self-play records. argmax normalizes the two. policy_target_top_1 = tf.argmax(pi_tensor, axis=1) policy_output_in_top1 = tf.to_float( tf.nn.in_top_k(policy_output, policy_target_top_1, k=1)) policy_output_in_top3 = tf.to_float( tf.nn.in_top_k(policy_output, policy_target_top_1, k=3)) policy_top_1_confidence = tf.reduce_max(policy_output, axis=1) # policy_target_top_1_confidence = tf.boolean_mask( # policy_output, # tf.one_hot(policy_target_top_1, tf.shape(policy_output)[1])) value_cost_normalized = value_cost / params['value_cost_weight'] with tf.variable_scope("metrics"): metric_ops = { 'policy_cost': tf.metrics.mean(policy_cost), 'value_cost': tf.metrics.mean(value_cost), 'value_cost_normalized': tf.metrics.mean(value_cost_normalized), 'l2_cost': tf.metrics.mean(l2_cost), 'policy_entropy': tf.metrics.mean(policy_entropy), 'combined_cost': tf.metrics.mean(combined_cost), 'policy_accuracy_top_1': tf.metrics.mean(policy_output_in_top1), 'policy_accuracy_top_3': tf.metrics.mean(policy_output_in_top3), 'policy_top_1_confidence': tf.metrics.mean(policy_top_1_confidence), # 'policy_target_top_1_confidence': tf.metrics.mean( # policy_target_top_1_confidence), 'value_confidence': tf.metrics.mean(tf.abs(value_output)), } if est_mode == tf.estimator.ModeKeys.EVAL: return metric_ops # NOTE: global_step is rounded to a multiple of FLAGS.summary_steps. eval_step = tf.reduce_min(step) # Create summary ops so that they show up in SUMMARIES collection # That way, they get logged automatically during training summary_writer = summary.create_file_writer(FLAGS.work_dir) with summary_writer.as_default(), \ summary.record_summaries_every_n_global_steps( params['summary_steps'], eval_step): for metric_name, metric_op in metric_ops.items(): summary.scalar(metric_name, metric_op[1], step=eval_step) # Reset metrics occasionally so that they are mean of recent batches. reset_op = tf.variables_initializer(tf.local_variables("metrics")) cond_reset_op = tf.cond( tf.equal(eval_step % params['summary_steps'], tf.to_int64(1)), lambda: reset_op, lambda: tf.no_op()) return summary.all_summary_ops() + [cond_reset_op] metric_args = [ policy_output, value_output, labels.pi_tensor, tf.reshape(policy_cost, [1]), tf.reshape(value_cost, [1]), tf.reshape(l2_cost, [1]), tf.reshape(combined_cost, [1]), tf.reshape(global_step, [1]), ] predictions = { 'policy_output': tf.identity(policy_output, 'policy_output'), 'value_output': tf.identity(value_output, 'value_output'), } eval_metrics_only_fn = functools.partial( eval_metrics_host_call_fn, est_mode=tf.estimator.ModeKeys.EVAL) host_call_fn = functools.partial(eval_metrics_host_call_fn, est_mode=tf.estimator.ModeKeys.TRAIN) tpu_estimator_spec = tpu_estimator.TPUEstimatorSpec( mode=mode, predictions=predictions, loss=combined_cost, train_op=train_op, eval_metrics=(eval_metrics_only_fn, metric_args), # host_call=(host_call_fn, metric_args) ) if params['use_tpu']: return tpu_estimator_spec else: return tpu_estimator_spec.as_estimator_spec()
def _compute_losses_and_predictions_dicts(model, features, labels, add_regularization_loss=True): """Computes the losses dict and predictions dict for a model on inputs. Args: model: a DetectionModel (based on Keras). features: Dictionary of feature tensors from the input dataset. Should be in the format output by `inputs.train_input` and `inputs.eval_input`. features[fields.InputDataFields.image] is a [batch_size, H, W, C] float32 tensor with preprocessed images. features[HASH_KEY] is a [batch_size] int32 tensor representing unique identifiers for the images. features[fields.InputDataFields.true_image_shape] is a [batch_size, 3] int32 tensor representing the true image shapes, as preprocessed images could be padded. features[fields.InputDataFields.original_image] (optional) is a [batch_size, H, W, C] float32 tensor with original images. labels: A dictionary of groundtruth tensors post-unstacking. The original labels are of the form returned by `inputs.train_input` and `inputs.eval_input`. The shapes may have been modified by unstacking with `model_lib.unstack_batch`. However, the dictionary includes the following fields. labels[fields.InputDataFields.num_groundtruth_boxes] is a int32 tensor indicating the number of valid groundtruth boxes per image. labels[fields.InputDataFields.groundtruth_boxes] is a float32 tensor containing the corners of the groundtruth boxes. labels[fields.InputDataFields.groundtruth_classes] is a float32 one-hot tensor of classes. labels[fields.InputDataFields.groundtruth_weights] is a float32 tensor containing groundtruth weights for the boxes. -- Optional -- labels[fields.InputDataFields.groundtruth_instance_masks] is a float32 tensor containing only binary values, which represent instance masks for objects. labels[fields.InputDataFields.groundtruth_keypoints] is a float32 tensor containing keypoints for each box. labels[fields.InputDataFields.groundtruth_dp_num_points] is an int32 tensor with the number of sampled DensePose points per object. labels[fields.InputDataFields.groundtruth_dp_part_ids] is an int32 tensor with the DensePose part ids (0-indexed) per object. labels[fields.InputDataFields.groundtruth_dp_surface_coords] is a float32 tensor with the DensePose surface coordinates. labels[fields.InputDataFields.groundtruth_group_of] is a tf.bool tensor containing group_of annotations. labels[fields.InputDataFields.groundtruth_labeled_classes] is a float32 k-hot tensor of classes. labels[fields.InputDataFields.groundtruth_track_ids] is a int32 tensor of track IDs. add_regularization_loss: Whether or not to include the model's regularization loss in the losses dictionary. Returns: A tuple containing the losses dictionary (with the total loss under the key 'Loss/total_loss'), and the predictions dictionary produced by `model.predict`. """ model_lib.provide_groundtruth(model, labels) preprocessed_images = features[fields.InputDataFields.image] prediction_dict = model.predict( preprocessed_images, features[fields.InputDataFields.true_image_shape], **model.get_side_inputs(features)) prediction_dict = ops.bfloat16_to_float32_nested(prediction_dict) losses_dict = model.loss(prediction_dict, features[fields.InputDataFields.true_image_shape]) losses = [loss_tensor for loss_tensor in losses_dict.values()] if add_regularization_loss: # TODO(kaftan): As we figure out mixed precision & bfloat 16, we may ## need to convert these regularization losses from bfloat16 to float32 ## as well. regularization_losses = model.regularization_losses() if regularization_losses: regularization_losses = ops.bfloat16_to_float32_nested( regularization_losses) regularization_loss = tf.add_n(regularization_losses, name='regularization_loss') losses.append(regularization_loss) losses_dict['Loss/regularization_loss'] = regularization_loss total_loss = tf.add_n(losses, name='total_loss') losses_dict['Loss/total_loss'] = total_loss return losses_dict, prediction_dict
def apply_line_prediction(inputs, features, blur_steps, learn_alpha=True, name=None): """Applies "Line Prediction" layer to input images.""" inputs.shape.assert_is_compatible_with([None, None, None, 6]) with tf.name_scope(name, 'blur_prediction', values=[inputs, features]): with tf.name_scope(None, 'input_frames', values=[inputs]): frames = [inputs[:, :, :, :3], inputs[:, :, :, 3:]] with tf.name_scope(None, 'frame_size', values=[inputs, features]): shape = tf.shape(inputs) height = shape[1] width = shape[2] with tf.name_scope(None, 'identity_warp', values=[]): x_idx, y_idx = tf.meshgrid(tf.range(width), tf.range(height)) identity_warp = tf.to_float(tf.stack([x_idx, y_idx], axis=-1)) identity_warp = identity_warp[tf.newaxis, :, :, tf.newaxis, :] warp_steps = tf.to_float(tf.range(blur_steps - 1) + 1) / (blur_steps - 1) warp_steps = warp_steps[tf.newaxis, tf.newaxis, tf.newaxis, :, tf.newaxis] max_warps = tf.to_float(tf.stack([width - 1, height - 1])) max_warps = max_warps[tf.newaxis, tf.newaxis, tf.newaxis, tf.newaxis, :] output_frames = [] for frame in frames: with tf.name_scope(None, 'predict_blurs', values=[features]): flow = tf.layers.conv2d(features, 2, 1, padding='same') if learn_alpha: alpha = tf.layers.conv2d(features, blur_steps, 1, padding='same', activation=tf.nn.softmax) with tf.name_scope(None, 'apply_blurs', values=[]): with tf.name_scope(None, 'warp', values=[frame, flow]): warps = identity_warp + flow[:, :, :, tf.newaxis, :] * warp_steps warps = tf.clip_by_value(warps, 0.0, max_warps) warped = contrib_resampler.resampler(frame, warps) warped = tf.concat([frame[:, :, :, tf.newaxis, :], warped], axis=3) with tf.name_scope(None, 'apply_alpha', values=[frame, flow]): if learn_alpha: mask = alpha[:, :, :, :, tf.newaxis] else: mask = 1.0 / blur_steps output_frames.append(tf.reduce_sum(warped * mask, axis=3)) with tf.name_scope(None, 'outputs', values=[output_frames]): output = tf.add_n(output_frames) / len(frames) return output
def backword(mnist): # 给训练数据x,标签y_占位 x = tf.placeholder(tf.float32, [None, mnist_forward.INPUT_NODE]) y_ = tf.placeholder(tf.float32, [None, mnist_forward.OUTPUT_NODE]) # 使用前向传播过程,设置是否正则化,计算预测结果y y = mnist_forward.forward(x, REGULARIZER) # 轮数计数器,不可训练 global_step = tf.Variable(0, trainable=False) # 定义交叉熵损失 # 因为交叉熵一般和softmax回归一起使用, # 所以 tf.nn.sparse_softmax_cross_entropy_with_logits函数 # 对这两个功能进行了封装。 # 这里使用该函数进行加速交叉熵的计算, # 第一个参数是不包括softmax层的前向传播结果。 # 第二个参数是训练数据的正确答案, # 这里得到的是正确答案的这里使用该函数进行加速交叉熵的计算, # 第一个参数是不包括softmax层的前向传播结果。 # 第二个参数是训练数据的正确答案,这里得到的是正确答案的正确编号 ce = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax( y_, 1)) # 计算当前batch中所有样例的交叉熵平均值 cem = tf.reduce_mean(ce) # 总损失等于交叉熵损失和正则化损失的和 loss = cem + tf.add_n(tf.get_collection('losses')) # 设定指数衰减学习率 learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE, global_step, mnist.train.num_examples / BATCH_SIZE, LEARNING_RATE_DECAY, staircase=True) # 定义反向传播方法 train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize( loss, global_step=global_step) # 滑动平均: 记录一段时间内模型的所有参数w和b各自的平均值,影子值,追随参数的变化而变化 # MOVING_AVERAGE_DECAY: 滑动平均衰减率 ema = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step) ema_op = ema.apply(tf.trainable_variables()) # 在训练神经网络时,每过一遍数据既需要通过反向传播来更新神经神经网络的参数, # 又需要更新每一个参数的滑动平均值,这里的 tf.control_dependencies with tf.control_dependencies([train_step, ema_op]): train_op = tf.no_op(name='train') saver = tf.train.Saver() with tf.Session() as sess: # 初始化 tf.global_variables_initializer().run() ckpt = tf.train.get_checkpoint_state(MODEL_SAVE_PATH) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) for i in range(STEPS): xs, ys = mnist.train.next_batch(BATCH_SIZE) _, loss_value, step = sess.run([train_op, loss, global_step], feed_dict={ x: xs, y_: ys }) if i % 1000 == 0: print("After %d training steps,loss on training batch is %g." % (step, loss_value)) saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step=global_step)
def add_n(input): """Apply sum function.""" return tf.add_n(list(input))
def loss_layer(self, predict, labels): """ Define loss layer Parameters ---------- predict: TensorFlow Tensor The predicted values for the batch of data labels: TensorFlow Tensor Ground truth labels for the batch of data Returns ------- loss: TensorFlow Tensor Loss (combination of regression and classification losses) """ POS_IOU = 0.7 NEG_IOU = 0.3 rescore = int( _utils.convert_shared_float_array_to_numpy( self.config.get("od_rescore"))) lmb_coord_xy = _utils.convert_shared_float_array_to_numpy( self.config.get("lmb_coord_xy")) lmb_coord_wh = _utils.convert_shared_float_array_to_numpy( self.config.get("lmb_coord_wh")) lmb_obj = _utils.convert_shared_float_array_to_numpy( self.config.get("lmb_obj")) lmb_noobj = _utils.convert_shared_float_array_to_numpy( self.config.get("lmb_noobj")) lmb_class = _utils.convert_shared_float_array_to_numpy( self.config.get("lmb_class")) # Prediction values from model on the images ypred = _tf.reshape( predict, [-1] + list(self.grid_shape) + [self.num_anchors, 5 + self.num_classes], ) raw_xy = ypred[..., 0:2] raw_wh = ypred[..., 2:4] raw_conf = ypred[..., 4] class_scores = ypred[..., 5:] tf_anchors = _tf.constant(self.anchors) # Ground Truth info derived from ymap/labels gt_xy = labels[..., 0:2] gt_wh = labels[..., 2:4] gt_raw_wh = _tf.math.log(gt_wh / tf_anchors + 1e-5) gt_conf = labels[..., 4] gt_class = labels[..., 5:] # Calculations on predicted confidences xy = _tf.sigmoid(raw_xy) wh = _tf.exp(raw_wh) * tf_anchors wh_anchors = _tf.exp(raw_wh * 0.0) * tf_anchors lo = xy - wh / 2 hi = xy + wh / 2 gt_area = gt_wh[..., 0] * gt_wh[..., 1] gt_lo = gt_xy - gt_wh / 2 gt_hi = gt_xy + gt_wh / 2 c_inter = _tf.maximum(2 * _tf.minimum(wh_anchors / 2, gt_wh / 2), 0) c_area = wh_anchors[..., 0] * wh_anchors[..., 1] c_inter_area = c_inter[..., 0] * c_inter[..., 1] c_iou = c_inter_area / (c_area + gt_area - c_inter_area) inter = _tf.maximum(_tf.minimum(hi, gt_hi) - _tf.maximum(lo, gt_lo), 0) area = wh[..., 0] * wh[..., 1] inter_area = inter[..., 0] * inter[..., 1] iou = inter_area / (area + gt_area - inter_area) active_iou = c_iou cond_gt = _tf.cast(_tf.equal(gt_conf, _tf.constant(1.0)), dtype=_tf.float32) max_iou = _tf.reduce_max(active_iou, 3, keepdims=True) cond_max = _tf.cast(_tf.equal(active_iou, max_iou), dtype=_tf.float32) cond_above = c_iou > POS_IOU cond_logical_or = _tf.cast( _tf.math.logical_or(_tf.cast(cond_max, dtype=_tf.bool), _tf.cast(cond_above, dtype=_tf.bool)), dtype=_tf.float32, ) cond_obj = _tf.cast( _tf.math.logical_and( _tf.cast(cond_gt, dtype=_tf.bool), _tf.cast(cond_logical_or, dtype=_tf.bool), ), dtype=_tf.float32, ) kr_obj_ij = _tf.stop_gradient(cond_obj) cond_below = c_iou < NEG_IOU cond_logical_not = _tf.cast(_tf.math.logical_not( _tf.cast(cond_obj, dtype=_tf.bool)), dtype=_tf.float32) cond_noobj = _tf.cast( _tf.math.logical_and( _tf.cast(cond_below, dtype=_tf.bool), _tf.cast(cond_logical_not, dtype=_tf.bool), ), dtype=_tf.float32, ) kr_noobj_ij = _tf.stop_gradient(cond_noobj) count = _tf.reduce_sum(kr_obj_ij) eps_count = _tf.math.add(count, _tf.constant(1e-4)) scale_conf = 1 / (self.batch_size * self.grid_shape[0] * self.grid_shape[1]) kr_obj_ij_plus1 = _tf.expand_dims(kr_obj_ij, -1) if rescore: obj_gt_conf = kr_obj_ij * _tf.stop_gradient(iou) else: obj_gt_conf = kr_obj_ij obj_w_obj = kr_obj_ij * lmb_obj obj_w_noobj = kr_noobj_ij * lmb_noobj obj_w = _tf.math.add(obj_w_obj, obj_w_noobj) loss_xy = (lmb_coord_xy * _tf.reduce_sum(kr_obj_ij_plus1 * _tf.square(gt_xy - xy)) / eps_count) loss_wh = _tf.losses.huber_loss( labels=gt_raw_wh, predictions=raw_wh, weights=lmb_coord_wh * kr_obj_ij_plus1, delta=1.0, ) loss_conf = scale_conf * _tf.reduce_sum( obj_w * _tf.nn.sigmoid_cross_entropy_with_logits( labels=obj_gt_conf, logits=raw_conf)) loss_cls = (lmb_class * _tf.reduce_sum( kr_obj_ij * _tf.nn.softmax_cross_entropy_with_logits_v2( labels=gt_class, logits=class_scores)) / eps_count) losses = [loss_xy, loss_wh, loss_conf, loss_cls] loss = _tf.add_n(losses) return loss
bias3 = tf.Variable(tf.constant(0.1, shape=[1])) #输出y y = hidden_layer(x, weight1, bias1, weight2, bias2, weight3, bias3) #加入正则化 #regularizer = tf.contrib.layers.l2_regularizer(0.01) regularizer = tf.keras.regularizers.l2(0.001) regularization = regularizer(weight1) + regularizer(weight2) + regularizer( weight3) tf.add_to_collection("losses", regularization) #定义损失函数 error_loss = tf.reduce_sum(tf.pow(y_ - y, 2)) / sample_size tf.add_to_collection("losses", error_loss) loss = tf.add_n(tf.get_collection("losses")) #定义准确率 accuracy = tf.count_nonzero(tf.less(tf.pow(y_ - y, 2), 0.25)) / sample_size_yz #定义优化器 train_op = tf.train.AdamOptimizer(0.05).minimize(loss) #train_op = tf.train.GradientDescentOptimizer(0.05).minimize(loss) with tf.Session() as sess: tf.global_variables_initializer().run() for i in range(training_steps): sess.run(train_op, feed_dict={x: data, y_: label}) if i % 2000 == 0: #计算损失函数 loss_value = sess.run(loss, feed_dict={x: data, y_: label})
def _define_graph(model_type, input_shape, output_size, nn_width, nn_depth, weight_decay, learning_rate): """Define the model graph.""" # Inference inputs input_size = int(np.prod(input_shape)) observations = tf.placeholder(tf.float32, [None, input_size], name="input") legals_mask = tf.placeholder(tf.bool, [None, output_size], name="legals_mask") training = tf.placeholder(tf.bool, name="training") bn_updates = [] # Main torso of the network if model_type == "mlp": torso = observations # Ignore the input shape, treat it as a flat array. for i in range(nn_depth): torso = cascade(torso, [ tfkl.Dense(nn_width, name=f"torso_{i}_dense"), tfkl.Activation("relu"), ]) elif model_type == "conv2d": torso = tfkl.Reshape(input_shape)(observations) for i in range(nn_depth): torso = cascade(torso, [ conv_2d(nn_width, 3, name=f"torso_{i}_conv"), batch_norm(training, bn_updates, f"torso_{i}_batch_norm"), tfkl.Activation("relu"), ]) elif model_type == "resnet": torso = cascade(observations, [ tfkl.Reshape(input_shape), conv_2d(nn_width, 3, name="torso_in_conv"), batch_norm(training, bn_updates, "torso_in_batch_norm"), tfkl.Activation("relu"), ]) for i in range(nn_depth): torso = residual_layer(torso, nn_width, 3, training, bn_updates, f"torso_{i}") else: raise ValueError("Unknown model type.") # The policy head if model_type == "mlp": policy_head = cascade(torso, [ tfkl.Dense(nn_width, name="policy_dense"), tfkl.Activation("relu"), ]) else: policy_head = cascade(torso, [ conv_2d(filters=2, kernel_size=1, name="policy_conv"), batch_norm(training, bn_updates, "policy_batch_norm"), tfkl.Activation("relu"), tfkl.Flatten(), ]) policy_logits = tfkl.Dense(output_size, name="policy")(policy_head) policy_logits = tf.where(legals_mask, policy_logits, -1e32 * tf.ones_like(policy_logits)) unused_policy_softmax = tf.identity(tfkl.Softmax()(policy_logits), name="policy_softmax") policy_targets = tf.placeholder(shape=[None, output_size], dtype=tf.float32, name="policy_targets") policy_loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(logits=policy_logits, labels=policy_targets), name="policy_loss") # The value head if model_type == "mlp": value_head = torso # Nothing specific before the shared value head. else: value_head = cascade(torso, [ conv_2d(filters=1, kernel_size=1, name="value_conv"), batch_norm(training, bn_updates, "value_batch_norm"), tfkl.Activation("relu"), tfkl.Flatten(), ]) value_out = cascade(value_head, [ tfkl.Dense(nn_width, name="value_dense"), tfkl.Activation("relu"), tfkl.Dense(1, name="value"), tfkl.Activation("tanh"), ]) # Need the identity to name the single value output from the dense layer. value_out = tf.identity(value_out, name="value_out") value_targets = tf.placeholder(shape=[None, 1], dtype=tf.float32, name="value_targets") value_loss = tf.identity(tf.losses.mean_squared_error( value_out, value_targets), name="value_loss") l2_reg_loss = tf.add_n([ weight_decay * tf.nn.l2_loss(var) for var in tf.trainable_variables() if "/bias:" not in var.name ], name="l2_reg_loss") total_loss = policy_loss + value_loss + l2_reg_loss optimizer = tf.train.AdamOptimizer(learning_rate) with tf.control_dependencies(bn_updates): unused_train = optimizer.minimize(total_loss, name="train")
def finalize(self, optimizer, optimizer_args=None, *args, **kwargs): """Finalizes the network. Arguments: optimizer: (tf.train.Optimizer) An optimizer class from those available at tf.train.Optimizer. optimizer_args: (dict) A dictionary of arguments for the __init__ method of the chosen optimizer. Returns: None """ if len(self.layers) == 0: raise RuntimeError("Cannot finalize an empty network.") if self.finalized: raise RuntimeError("Can only finalize a network once.") optimizer_args = {} if optimizer_args is None else optimizer_args self.optimizer = optimizer(**optimizer_args) # Construct all variables. with self.sess.as_default(): with tf.variable_scope(self.name): self.scaler = TensorStandardScaler( self.layers[0].get_input_dim()) for i, layer in enumerate(self.layers): with tf.variable_scope("Layer%i" % i): layer.construct_vars() self.decays.extend(layer.get_decays()) self.optvars.extend(layer.get_vars()) self.nonoptvars.extend(self.scaler.get_vars()) # Setup training with tf.variable_scope(self.name): self.optimizer = optimizer(**optimizer_args) self.sy_train_in = tf.placeholder( dtype=tf.float32, shape=[self.num_nets, None, self.layers[0].get_input_dim()], name="training_inputs") self.sy_train_targ = tf.placeholder( dtype=tf.float32, shape=[self.num_nets, None, self.layers[-1].get_output_dim()], name="training_targets") train_loss = tf.reduce_sum( self._compile_losses(self.sy_train_in, self.sy_train_targ)) train_loss += tf.add_n(self.decays) self.mse_loss = self._compile_losses(self.sy_train_in, self.sy_train_targ) self.train_op = self.optimizer.minimize(train_loss, var_list=self.optvars) # Initialize all variables self.sess.run( tf.variables_initializer(self.optvars + self.nonoptvars + self.optimizer.variables())) # Setup prediction with tf.variable_scope(self.name): self.sy_pred_in2d = tf.placeholder( dtype=tf.float32, shape=[None, self.layers[0].get_input_dim()], name="2D_training_inputs") self.sy_pred_mean2d_fac = self.create_prediction_tensors( self.sy_pred_in2d, factored=True)[0] self.sy_pred_mean2d = tf.reduce_mean(self.sy_pred_mean2d_fac, axis=0) self.sy_pred_var2d = tf.reduce_mean( tf.square(self.sy_pred_mean2d_fac - self.sy_pred_mean2d), axis=0) self.sy_pred_in3d = tf.placeholder( dtype=tf.float32, shape=[self.num_nets, None, self.layers[0].get_input_dim()], name="3D_training_inputs") self.sy_pred_mean3d_fac = \ self.create_prediction_tensors(self.sy_pred_in3d, factored=True)[0] # Load model if needed if self.model_loaded: with self.sess.as_default(): params_dict = loadmat( os.path.join(self.model_dir, "%s.mat" % self.name)) all_vars = self.nonoptvars + self.optvars for i, var in enumerate(all_vars): var.load(params_dict[str(i)]) self.finalized = True
def create_dual_ibp_approx(num_layers, batch_size, action_max, W_T_list, b_T_list, action_tensor_center, return_full_info=False): #layers_n: number of hidden units each layer #W_T_list, b_T_list: multiplicatie and bias weights for each layer #X: raw input, y: one-hot encoding of labels # List of bounds (l_i,u_i) for i = 1,...,K-1 l_list = [ action_tensor_center - action_max * tf.ones_like(action_tensor_center) ] u_list = [ action_tensor_center + action_max * tf.ones_like(action_tensor_center) ] # List of transition matrices D_i for i = 1,...,K-1 D_list = [tf.zeros_like(action_tensor_center)] # Indicators of spanning ReLu neurons for i = 1,...,K-1 I_list = [tf.zeros_like(action_tensor_center)] # Indicators of active ReLu neurons for i = 1,...,K-1 Ip_list = [tf.zeros_like(action_tensor_center)] # Final list of duals nu_i for i = 1,...,K-1 Nu_list = [ tf.zeros([batch_size, W_T_list[0].get_shape().as_list()[1], 1]) for i in range(num_layers - 1) ] # Initialize Nu_K Nu_K = -tf.expand_dims(-tf.eye(1), axis=-1) # Final list of b_i'*nu_{i+1} for i = 1,...,K-1 gamma_list = [b_T_list[i] for i in range(num_layers - 1)] ################## get bounds for layers i = 2,...,K-1 for i in range(2, num_layers): pre_l_i = l_list[-1] pre_u_i = u_list[-1] mu_i = 0.5 * (pre_l_i + pre_u_i) r_i = 0.5 * (pre_u_i - pre_l_i) l_i = tf.matmul(mu_i, W_T_list[i - 2]) - tf.matmul( r_i, tf.abs(W_T_list[i - 2])) + b_T_list[i - 2] u_i = tf.matmul(mu_i, W_T_list[i - 2]) + tf.matmul( r_i, tf.abs(W_T_list[i - 2])) + b_T_list[i - 2] l_list.append(l_i) u_list.append(u_i) # form Ip, I Ip_i, I_i = dual_method.get_I(l_list[-1], u_list[-1]) I_list.append(I_i) Ip_list.append(Ip_i) # form D D_i = dual_method.get_D(l_list[-1], u_list[-1], Ip_i, I_i) D_list.append(D_i) ############## Go backward and form Nu_i # initialize Nu_{K-1} & gamma_{K-1} Nu_list[-1] = tf.einsum('ij,jk->ijk', D_list[-1], W_T_list[-1]) Nu_K = tf.tile(Nu_K, [Nu_list[-1].get_shape().as_list()[0], 1, 1]) Nu_list[-1] = tf.einsum('ijk,ikm->ijm', Nu_list[-1], Nu_K) gamma_list[-1] = tf.einsum('ij,ijm->im', gamma_list[-1], Nu_K) # initialize lv_sum lv_sum = tf.einsum('ij,ijm->im', l_list[-1] * I_list[-1], tf.nn.relu(Nu_list[-1])) # update Nu_j for layers j = K-2,...,2 # and gamma_j for layers j = K-2,...,2 for j in range(num_layers - 2, 1, -1): Nu_hat_j = tf.einsum('jk,ikm->ijm', W_T_list[j - 1], Nu_list[j]) gamma_list[j - 1] = tf.einsum('ij,ijm->im', b_T_list[j - 1], Nu_list[j]) Nu_list[j - 1] = tf.einsum('ij,ijk->ijk', D_list[j - 1], Nu_hat_j) lv_sum = tf.add( lv_sum, tf.einsum('ij,ijm->im', l_list[j - 1] * I_list[j - 1], tf.nn.relu(Nu_list[j - 1]))) # update nu_hat_1 and gamma_1 Nu_hat_1 = tf.einsum('jk,ikm->ijm', W_T_list[0], Nu_list[1]) gamma_list[0] = tf.einsum('ij,ijm->im', b_T_list[0], Nu_list[1]) # Compute J_tilde psi = tf.einsum('ij,ijm->im', action_tensor_center, Nu_hat_1) + tf.add_n(gamma_list) Nu_hat_1_norm = tf.norm(Nu_hat_1, 1, axis=1, keepdims=False) J_tilde = -psi - action_max * Nu_hat_1_norm + lv_sum if return_full_info: return (-J_tilde, l_list, u_list, D_list, Nu_list, lv_sum, gamma_list, psi, Nu_hat_1) else: return -J_tilde
def bilinear_sampler(img, x, y): """ Performs bilinear sampling of the input images according to the normalized coordinates provided by the sampling grid. Note that the sampling is done identically for each channel of the input. To test if the function works properly, output image should be identical to input image when theta is initialized to identity transform. Input ----- - img: batch of images in (B, H, W, C) layout. - grid: x, y which is the output of affine_grid_generator. Returns ------- - interpolated images according to grids. Same size as grid. """ # prepare useful params B = tf.shape(img)[0] H = tf.shape(img)[1] W = tf.shape(img)[2] C = tf.shape(img)[3] max_y = tf.cast(H - 1, 'int32') max_x = tf.cast(W - 1, 'int32') zero = tf.zeros([], dtype='int32') # cast indices as float32 (for rescaling) x = tf.cast(x, 'float32') y = tf.cast(y, 'float32') # rescale x and y to [0, W/H] x = 0.5 * ((x + 1.0) * tf.cast(W, 'float32')) y = 0.5 * ((y + 1.0) * tf.cast(H, 'float32')) # grab 4 nearest corner points for each (x_i, y_i) # i.e. we need a rectangle around the point of interest x0 = tf.cast(tf.floor(x), 'int32') x1 = x0 + 1 y0 = tf.cast(tf.floor(y), 'int32') y1 = y0 + 1 # clip to range [0, H/W] to not violate img boundaries x0 = tf.clip_by_value(x0, zero, max_x) x1 = tf.clip_by_value(x1, zero, max_x) y0 = tf.clip_by_value(y0, zero, max_y) y1 = tf.clip_by_value(y1, zero, max_y) # get pixel value at corner coords Ia = get_pixel_value(img, x0, y0) Ib = get_pixel_value(img, x0, y1) Ic = get_pixel_value(img, x1, y0) Id = get_pixel_value(img, x1, y1) # recast as float for delta calculation x0 = tf.cast(x0, 'float32') x1 = tf.cast(x1, 'float32') y0 = tf.cast(y0, 'float32') y1 = tf.cast(y1, 'float32') # calculate deltas wa = (x1 - x) * (y1 - y) wb = (x1 - x) * (y - y0) wc = (x - x0) * (y1 - y) wd = (x - x0) * (y - y0) # add dimension for addition wa = tf.expand_dims(wa, axis=3) wb = tf.expand_dims(wb, axis=3) wc = tf.expand_dims(wc, axis=3) wd = tf.expand_dims(wd, axis=3) # compute output out = tf.add_n([wa * Ia, wb * Ib, wc * Ic, wd * Id]) return out
def generator(z, progress, num_filters_fn, resolution_schedule, num_blocks=None, kernel_size=3, colors=3, to_rgb_activation=None, scope='progressive_gan_generator', reuse=None): """Generator network for the progressive GAN model. Args: z: A `Tensor` of latent vector. The first dimension must be batch size. progress: A scalar float `Tensor` of training progress. num_filters_fn: A function that maps `block_id` to # of filters for the block. resolution_schedule: An object of `ResolutionSchedule`. num_blocks: An integer of number of blocks. None means maximum number of blocks, i.e. `resolution.schedule.num_resolutions`. Defaults to None. kernel_size: An integer of convolution kernel size. colors: Number of output color channels. Defaults to 3. to_rgb_activation: Activation function applied when output rgb. scope: A string or variable scope. reuse: Whether to reuse `scope`. Defaults to None which means to inherit the reuse option of the parent scope. Returns: A `Tensor` of model output and a dictionary of model end points. """ if num_blocks is None: num_blocks = resolution_schedule.num_resolutions start_h, start_w = resolution_schedule.start_resolutions final_h, final_w = resolution_schedule.final_resolutions def _conv2d(scope, x, kernel_size, filters, padding='SAME'): return layers.custom_conv2d( x=x, filters=filters, kernel_size=kernel_size, padding=padding, activation=lambda x: layers.pixel_norm(tf.nn.leaky_relu(x)), he_initializer_slope=0.0, scope=scope) def _to_rgb(x): return layers.custom_conv2d(x=x, filters=colors, kernel_size=1, padding='SAME', activation=to_rgb_activation, scope='to_rgb') end_points = {} with tf.variable_scope(scope, reuse=reuse): with tf.name_scope('input'): x = tf.layers.flatten(z) end_points['latent_vector'] = x with tf.variable_scope(block_name(1)): x = tf.expand_dims(tf.expand_dims(x, 1), 1) x = layers.pixel_norm(x) # Pad the 1 x 1 image to 2 * (start_h - 1) x 2 * (start_w - 1) # with zeros for the next conv. x = tf.pad(tensor=x, paddings=[[0] * 2, [start_h - 1] * 2, [start_w - 1] * 2, [0] * 2]) # The output is start_h x start_w x num_filters_fn(1). x = _conv2d('conv0', x, (start_h, start_w), num_filters_fn(1), 'VALID') x = _conv2d('conv1', x, kernel_size, num_filters_fn(1)) lods = [x] for block_id in range(2, num_blocks + 1): with tf.variable_scope(block_name(block_id)): x = layers.upscale(x, resolution_schedule.scale_base) x = _conv2d('conv0', x, kernel_size, num_filters_fn(block_id)) x = _conv2d('conv1', x, kernel_size, num_filters_fn(block_id)) lods.append(x) outputs = [] for block_id in range(1, num_blocks + 1): with tf.variable_scope(block_name(block_id)): lod = _to_rgb(lods[block_id - 1]) scale = resolution_schedule.scale_factor(block_id) lod = layers.upscale(lod, scale) end_points['upscaled_rgb_{}'.format(block_id)] = lod # alpha_i is used to replace lod_select. Note sum(alpha_i) is # garanteed to be 1. alpha = _generator_alpha(block_id, progress) end_points['alpha_{}'.format(block_id)] = alpha outputs.append(lod * alpha) predictions = tf.add_n(outputs) batch_size = tf.compat.dimension_value(z.shape[0]) predictions.set_shape([batch_size, final_h, final_w, colors]) end_points['predictions'] = predictions return predictions, end_points
def get_regularization_loss(self, scope_name, name='regularization_loss'): losses = self.get_regularization_losses(scope_name) return tf.add_n(losses, name=name)
y0 = Input((imSize,imSize,imSize,nClasses)) toCrop = int((y0.shape[1]-sm.shape[1])//2) y = Cropping3D(toCrop)(y0) cropSize = y.shape[1] l = [] # nl = [] for iClass in range(nClasses): labels0 = tf.reshape(tf.to_int32(tf.slice(y,[0,0,0,0,iClass],[-1,-1,-1,-1,1])),[batchSize,cropSize,cropSize,cropSize]) predict0 = tf.reshape(tf.to_int32(tf.equal(tf.argmax(sm,4),iClass)),[batchSize,cropSize,cropSize,cropSize]) correct = tf.multiply(labels0,predict0) nCorrect0 = tf.reduce_sum(correct) nLabels0 = tf.reduce_sum(labels0) l.append(tf.to_float(nCorrect0)/tf.to_float(nLabels0)) # nl.append(nLabels0) acc = tf.add_n(l)/nClasses loss = -tf.reduce_sum(tf.multiply(y,tf.log(sm))) updateOps = tf.get_collection(tf.GraphKeys.UPDATE_OPS) optimizer = tf.train.AdamOptimizer(learningRate) # optimizer = tf.train.MomentumOptimizer(0.00001,0.9) with tf.control_dependencies(updateOps): optOp = optimizer.minimize(loss) # optOp = optimizer.minimize(loss) # optOp = tf.group([optOp, updateOps]) # https://www.tensorflow.org/versions/r1.15/api_docs/python/tf/layers/batch_normalization # https://towardsdatascience.com/batch-normalization-theory-and-how-to-use-it-with-tensorflow-1892ca0173ad if train: tf.summary.scalar('loss', loss)
def reg_l2_loss(weight_decay, regex=r'.*(kernel|weight):0$'): """Return regularization l2 loss loss.""" var_match = re.compile(regex) return weight_decay * tf.add_n( [tf.nn.l2_loss(v) for v in tf.trainable_variables() if var_match.match(v.name)])
def model_fn(features, labels, mode, params): """Build model for boundary detection. Args: features: (Tensor) of input features, i.e. images. labels: (Tensor) of ground truth labels. mode: (String) train/eval/predict modes. params: (Dict) of model training parameters. """ eval_metrics, train_op, loss = None, None, None host_call = None training = mode == tf.estimator.ModeKeys.TRAIN if params["model_name"].startswith("vgg_16"): cfg = vgg_config(add_v1net_early=FLAGS.add_v1net_early, add_v1net=FLAGS.add_v1net) model = VGG(cfg) elif params["model_name"].startswith("resnet_v2_50"): cfg = resnet_v2_config(add_v1net_early=FLAGS.add_v1net_early, compact=FLAGS.compact) model = ResNetV2(cfg) predictions, _ = model.build_model(images=features, is_training=training, preprocess=True, ) one_hot_labels = tf.one_hot(labels, depth=1000) # output predictions if mode == tf.estimator.ModeKeys.PREDICT: predictions = { "predictions": tf.nn.argmax(predictions, axis=1), "probabilities": tf.nn.softmax(predictions), } return tf.estimator.tpu.TPUEstimatorSpec(mode, predictions=predictions) loss_fn = tf.nn.softmax_cross_entropy_with_logits loss_xent = tf.reduce_mean(loss_fn(logits=predictions, labels=one_hot_labels, )) # TODO(vveeraba): Test if layer normalization is taken into account below loss = loss_xent + FLAGS.weight_decay * tf.add_n( [tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'batch_normalization' not in v.name]) if training: global_step = tf.train.get_global_step() steps_per_epoch = params["num_train_steps_per_epoch"] # Starting lr for fast minimum of lr*100, 1e-4 fast_start = min(FLAGS.learning_rate*100, 1e-4) fast_learning_rate = build_learning_rate(fast_start, global_step, steps_per_epoch, decay_factor=0.1, decay_epochs=1) learning_rate = build_learning_rate(FLAGS.learning_rate, global_step, steps_per_epoch, decay_factor=0.1, decay_epochs=1) optimizer = get_optimizer(learning_rate, FLAGS.optimizer, FLAGS.use_tpu) slow_vars = [var for var in model.model_vars if "v1net" not in var.name] fast_vars = list(set(model.model_vars).difference(set(slow_vars))) fast_optimizer = get_optimizer(fast_learning_rate, FLAGS.optimizer, FLAGS.use_tpu) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) train_op = optimizer.minimize(loss, global_step, var_list=slow_vars) fast_train_op = fast_optimizer.minimize(loss, global_step, var_list=fast_vars) train_op = tf.group([train_op, update_ops, fast_train_op]) gs_t = tf.reshape(global_step, [1]) lr_t = tf.reshape(learning_rate, [1]) fast_lr_t = tf.reshape(fast_learning_rate, [1]) loss_t = tf.reshape(loss, [1]) predicted_labels = tf.argmax(predictions, axis=1) _, top_1_acc = tf.metrics.accuracy(predictions=predicted_labels, labels=labels) _, top_5_acc = tf.metrics.mean( tf.cast(tf.nn.in_top_k(predictions, labels, k=5), tf.float32)) top_1_acc = tf.reshape(top_1_acc, [1]) top_5_acc = tf.reshape(top_5_acc, [1]) def host_call_fn(gs, lr, fast_lr, loss, top_1, top_5): """Training host call. Creates scalar summaries for training metrics. This function is executed on the CPU and should not directly reference any Tensors in the rest of the `model_fn`. To pass Tensors from the model to the `metric_fn`, provide as part of the `host_call`. See https://www.tensorflow.org/api_docs/python/tf/estimator/tpu/TPUEstimatorSpec for more information. Arguments should match the list of `Tensor` objects passed as the second element in the tuple passed to `host_call`. Args: gs: `Tensor with shape `[1]` for the global_step lr:`Tensor` with shape[1] for learning rate loss: `Tensor` with shape `[1]` for the training loss. top_1: `Tensor` with shape `[1]` for top-1 accuracy. top_5: `Tensor` with shape `[5]` for top-5 accuracy. Returns: List of summary ops to run on the CPU host. """ gs = gs[0] with tf.compat.v2.summary.create_file_writer(params['model_dir'], max_queue=params['iterations_per_loop']).as_default(): with tf.compat.v2.summary.record_if(True): tf.compat.v2.summary.scalar('training/total_loss', loss[0], step=gs) tf.compat.v2.summary.scalar('training/learning_rate', lr[0], step=gs) tf.compat.v2.summary.scalar('training/fast_learning_rate', fast_lr[0], step=gs) tf.compat.v2.summary.scalar('training/top_1_accuracy',top_1[0], step=gs) tf.compat.v2.summary.scalar('training/top_5_accuracy',top_5[0], step=gs) return tf.summary.all_v2_summary_ops() host_call_args = [gs_t, lr_t, fast_lr_t, loss_t, top_1_acc, top_5_acc] host_call = (host_call_fn, host_call_args) if mode == tf.estimator.ModeKeys.EVAL: # Define evaluation metrics: def metric_fn(labels, logits): xentropy = tf.nn.softmax_cross_entropy_with_logits(labels=one_hot_labels, logits=logits) top_1_accuracy = tf.reduce_sum(tf.nn.in_top_k(logits, labels, k=1)) top_5_accuracy = tf.reduce_sum(tf.nn.in_top_k(logits, labels, k=5)) return { 'xentropy': xentropy, 'top_1_accuracy': top_1_accuracy, 'top_5_accuracy': top_5_accuracy, } eval_metrics = (metric_fn, [labels, predictions]) return tf.estimator.tpu.TPUEstimatorSpec(train_op=train_op, mode=mode, loss=loss, eval_metrics=eval_metrics, host_call=host_call, )
def body(self, features, decode_step=None, cache=None, decoding_stats=None, add_summary=True): encoder_output = None extra_losses = [] padding_bias = None if not self.hparams.fast_decode: decode_step = None if "inputs" in features: inputs = features["inputs"] # remove the last two dimensions that are always 1. inputs = tf.reshape( inputs, utils.shape_list(inputs)[:2] + [self.hidden_size]) # Padding bias only used for seq2seq models. padding_bias = utils.embedding_to_padding(inputs) # Mask random positions shape = utils.shape_list(inputs) if self.hparams.input_dropout: inputs = tf.where( tf.random.uniform(shape) < self.hparams.input_dropout, tf.zeros_like(inputs), inputs) if self.hparams.add_timing_signal: inputs += utils.get_timing_signal_1d(self.hparams.max_length, self.hidden_size) if cache is not None and -1 in cache: encoder_output = cache[-1] else: encoder_output = utils.transformer_encoder_layers( inputs=inputs, num_layers=self.num_encoder_layers, hparams=self.hparams, losses=extra_losses, name="encoder", token_bias=features.get("token_bias_inputs"), padding_bias=padding_bias) if cache is not None and -1 not in cache: cache[-1] = encoder_output targets = tf.to_int32(features["targets"]) # remove the last two dimensions that are always 1. targets = tf.reshape(targets, utils.shape_list(targets)[:2]) # Clamp targets to max_target_length targets = targets[:, :self.hparams.max_target_length] if self.is_decode: targets = self.process_partial_targets_decoding(targets) decoder_input = self.prepare_decoder(targets) decoder_output = utils.transformer_decoder_layers( inputs=decoder_input, num_layers=self.num_decoder_layers, hparams=self.hparams, encoder_output=encoder_output, decode_step=decode_step, losses=extra_losses, cache=cache, name="decoder", decoding_stats=decoding_stats, token_bias_inputs=features.get("token_bias_inputs"), token_bias_targets=features.get("token_bias_targets"), padding_bias=padding_bias) logits = self.produce_output(decoder_output) # Return logits as-is in decoding mode if self.is_decode: return logits # Add cross entropy loss one_hot_targets = tf.one_hot(tf.cast(targets, dtype=tf.int32), self.vocab_size) x_entropy = tf.nn.softmax_cross_entropy_with_logits_v2( labels=one_hot_targets, logits=logits) weights = tf.to_float(tf.not_equal(targets, 0)) loss = tf.reduce_sum(x_entropy * weights) / tf.reduce_sum(weights) if add_summary: tf.summary.scalar("losses/weight", tf.reduce_sum(weights)) tf.summary.scalar("losses/x_entropy", tf.reduce_sum(x_entropy * weights)) loss_dict = {"training": loss} if extra_losses: loss_dict["extra_loss"] = tf.add_n(extra_losses) # hack for T2T metrics logits = tf.reshape( logits, utils.shape_list(logits)[:2] + [1, 1] + utils.shape_list(logits)[-1:]) return logits, loss_dict
def _forward(self, x, y, model_params, init_states, is_training=False): """Computes the logits. Args: x: [batch_size, num_steps], input batch. y: [batch_size, num_steps], output batch. model_params: a `dict` of params to use. init_states: a `dict` of params to use. is_training: if `True`, will apply regularizations. Returns: loss: scalar, cross-entropy loss """ w_emb = model_params['w_emb'] w_prev = model_params['w_prev'] w_skip = model_params['w_skip'] w_soft = model_params['w_soft'] prev_s = init_states['s'] emb = tf.nn.embedding_lookup(w_emb, x) batch_size = self.params.batch_size hidden_size = self.params.hidden_size if is_training: emb = tf.layers.dropout(emb, self.params.drop_i, [self.params.batch_size, 1, hidden_size], training=True) input_mask = _gen_mask([batch_size, hidden_size], self.params.drop_x) layer_mask = _gen_mask([batch_size, hidden_size], self.params.drop_l) else: input_mask = None layer_mask = None out_s, all_s = _rnn_fn(emb, prev_s, w_prev, w_skip, input_mask, layer_mask, self.params) top_s = all_s if is_training: top_s = tf.layers.dropout(top_s, self.params.drop_o, [batch_size, 1, hidden_size], training=True) carry_on = [tf.assign(prev_s, out_s)] logits = tf.einsum('bnh,vh->bnv', top_s, w_soft) loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits) loss = tf.reduce_mean(loss) reg_loss = loss # loss + regularization_terms, for training only if is_training: # L2 weight reg reg_loss += self.params.weight_decay * tf.add_n( [tf.reduce_sum(w**2) for w in tf.trainable_variables()]) # activation L2 reg reg_loss += self.params.alpha * tf.reduce_mean(all_s**2) with tf.control_dependencies(carry_on): loss = tf.identity(loss) if is_training: reg_loss = tf.identity(reg_loss) return reg_loss, loss
def position_sensitive_crop_regions(image, boxes, crop_size, num_spatial_bins, global_pool): """Position-sensitive crop and pool rectangular regions from a feature grid. The output crops are split into `spatial_bins_y` vertical bins and `spatial_bins_x` horizontal bins. For each intersection of a vertical and a horizontal bin the output values are gathered by performing `tf.image.crop_and_resize` (bilinear resampling) on a a separate subset of channels of the image. This reduces `depth` by a factor of `(spatial_bins_y * spatial_bins_x)`. When global_pool is True, this function implements a differentiable version of position-sensitive RoI pooling used in [R-FCN detection system](https://arxiv.org/abs/1605.06409). When global_pool is False, this function implements a differentiable version of position-sensitive assembling operation used in [instance FCN](https://arxiv.org/abs/1603.08678). Args: image: A `Tensor`. Must be one of the following types: `uint8`, `int8`, `int16`, `int32`, `int64`, `half`, `float32`, `float64`. A 3-D tensor of shape `[image_height, image_width, depth]`. Both `image_height` and `image_width` need to be positive. boxes: A `Tensor` of type `float32`. A 2-D tensor of shape `[num_boxes, 4]`. Each box is specified in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the `[0, 1]` interval of normalized image height is mapped to `[0, image_height - 1] in image height coordinates. We do allow y1 > y2, in which case the sampled crop is an up-down flipped version of the original image. The width dimension is treated similarly. crop_size: A list of two integers `[crop_height, crop_width]`. All cropped image patches are resized to this size. The aspect ratio of the image content is not preserved. Both `crop_height` and `crop_width` need to be positive. num_spatial_bins: A list of two integers `[spatial_bins_y, spatial_bins_x]`. Represents the number of position-sensitive bins in y and x directions. Both values should be >= 1. `crop_height` should be divisible by `spatial_bins_y`, and similarly for width. The number of image channels should be divisible by (spatial_bins_y * spatial_bins_x). Suggested value from R-FCN paper: [3, 3]. global_pool: A boolean variable. If True, we perform average global pooling on the features assembled from the position-sensitive score maps. If False, we keep the position-pooled features without global pooling over the spatial coordinates. Note that using global_pool=True is equivalent to but more efficient than running the function with global_pool=False and then performing global average pooling. Returns: position_sensitive_features: A 4-D tensor of shape `[num_boxes, K, K, crop_channels]`, where `crop_channels = depth / (spatial_bins_y * spatial_bins_x)`, where K = 1 when global_pool is True (Average-pooled cropped regions), and K = crop_size when global_pool is False. Raises: ValueError: Raised in four situations: `num_spatial_bins` is not >= 1; `num_spatial_bins` does not divide `crop_size`; `(spatial_bins_y*spatial_bins_x)` does not divide `depth`; `bin_crop_size` is not square when global_pool=False due to the constraint in function space_to_depth. """ total_bins = 1 bin_crop_size = [] for (num_bins, crop_dim) in zip(num_spatial_bins, crop_size): if num_bins < 1: raise ValueError('num_spatial_bins should be >= 1') if crop_dim % num_bins != 0: raise ValueError( 'crop_size should be divisible by num_spatial_bins') total_bins *= num_bins bin_crop_size.append(crop_dim // num_bins) if not global_pool and bin_crop_size[0] != bin_crop_size[1]: raise ValueError('Only support square bin crop size for now.') ymin, xmin, ymax, xmax = tf.unstack(boxes, axis=1) spatial_bins_y, spatial_bins_x = num_spatial_bins # Split each box into spatial_bins_y * spatial_bins_x bins. position_sensitive_boxes = [] for bin_y in range(spatial_bins_y): step_y = (ymax - ymin) / spatial_bins_y for bin_x in range(spatial_bins_x): step_x = (xmax - xmin) / spatial_bins_x box_coordinates = [ ymin + bin_y * step_y, xmin + bin_x * step_x, ymin + (bin_y + 1) * step_y, xmin + (bin_x + 1) * step_x, ] position_sensitive_boxes.append(tf.stack(box_coordinates, axis=1)) image_splits = tf.split(value=image, num_or_size_splits=total_bins, axis=2) image_crops = [] for (split, box) in zip(image_splits, position_sensitive_boxes): if split.shape.is_fully_defined() and box.shape.is_fully_defined(): crop = tf.squeeze(matmul_crop_and_resize( tf.expand_dims(split, axis=0), tf.expand_dims(box, axis=0), bin_crop_size), axis=0) else: crop = tf.image.crop_and_resize( tf.expand_dims(split, 0), box, tf.zeros(tf.shape(boxes)[0], dtype=tf.int32), bin_crop_size) image_crops.append(crop) if global_pool: # Average over all bins. position_sensitive_features = tf.add_n(image_crops) / len(image_crops) # Then average over spatial positions within the bins. position_sensitive_features = tf.reduce_mean( position_sensitive_features, [1, 2], keepdims=True) else: # Reorder height/width to depth channel. block_size = bin_crop_size[0] if block_size >= 2: image_crops = [ tf.space_to_depth(crop, block_size=block_size) for crop in image_crops ] # Pack image_crops so that first dimension is for position-senstive boxes. position_sensitive_features = tf.stack(image_crops, axis=0) # Unroll the position-sensitive boxes to spatial positions. position_sensitive_features = tf.squeeze(tf.batch_to_space_nd( position_sensitive_features, block_shape=[1] + num_spatial_bins, crops=tf.zeros((3, 2), dtype=tf.int32)), axis=[0]) # Reorder back the depth channel. if block_size >= 2: position_sensitive_features = tf.depth_to_space( position_sensitive_features, block_size=block_size) return position_sensitive_features
def _calculate(self): # On tpu we strive to stack tensors together and perform ops once on the # entire stack, to save time HBM memory. We thus stack the batch-of-first- # frames and the batch-of-second frames, for both depth and RGB. The batch # dimension of rgb_stack and gt_depth_stack are thus twice the original # batch size. # Create stacks for features that need to be scaled into pyramids for # multi-scale training. rgb_stack_ = tf.concat(self._endpoints['rgb'], axis=0) flipped_rgb_stack_ = tf.concat(self._endpoints['rgb'][::-1], axis=0) predicted_depth_stack_ = tf.concat(self._endpoints['predicted_depth'], axis=0) flipped_predicted_depth_stack_ = tf.concat( self._endpoints['predicted_depth'][::-1], axis=0) residual_translation_ = tf.concat( self._endpoints['residual_translation'], axis=0) flipped_residual_translation_ = tf.concat( self._endpoints['residual_translation'][::-1], axis=0) intrinsics_mat_ = tf.concat(self._endpoints['intrinsics_mat'], axis=0) # Create pyramids from each stack to support multi-scale training. num_scales = self._params.num_scales rgb_pyramid = _get_pyramid(rgb_stack_, num_scales=num_scales) flipped_rgb_pyramid = _get_pyramid(flipped_rgb_stack_, num_scales=num_scales) predicted_depth_pyramid = _get_pyramid(predicted_depth_stack_, num_scales=num_scales) flipped_predicted_depth_pyramid = _get_pyramid( flipped_predicted_depth_stack_, num_scales=num_scales) residual_translation_pyramid = _get_pyramid(residual_translation_, num_scales=num_scales) flipped_residual_translation_pyramid = _get_pyramid( flipped_residual_translation_, num_scales=num_scales) intrinsics_mat_pyramid = _get_intrinsics_mat_pyramid( intrinsics_mat_, num_scales=num_scales) validity_mask_ = self._endpoints.get('validity_mask') if validity_mask_ is not None: validity_mask_ = tf.concat(validity_mask_, axis=0) validity_mask_pyramid = _get_pyramid(validity_mask_, num_scales, _min_pool2d) else: validity_mask_pyramid = [None] * num_scales if 'groundtruth_depth' in self._endpoints: gt_depth_stack_ = tf.concat(self._endpoints['groundtruth_depth'], axis=0) gt_depth_pyramid = _get_pyramid(gt_depth_stack_, num_scales=num_scales) if 'groundtruth_depth_weight' in self._endpoints: gt_depth_weight_stack_ = tf.concat( self._endpoints['groundtruth_depth_weight'], axis=0) else: gt_depth_weight_stack_ = tf.cast( tf.greater(gt_depth_stack_, 0.2), tf.float32) gt_depth_weight_pyramid = _get_pyramid(gt_depth_weight_stack_, num_scales=num_scales) if 'groundtruth_depth_filter' in self._endpoints: depth_filter_ = tf.concat( self._endpoints['groundtruth_depth_filter'], axis=0) depth_filter_ = tf.cast(depth_filter_, tf.float32) depth_filter_pyramid = _get_pyramid(gt_depth_stack_, num_scales=num_scales) # Calculate losses at each scale. Iterate in reverse so that the final # output values are set at scale 0. for s in reversed(range(self._params.num_scales)): # Weight applied to all losses at this scale. scale_w = 1.0 / 2**s rgb_stack = rgb_pyramid[s] predicted_depth_stack = predicted_depth_pyramid[s] flipped_predicted_depth_stack = flipped_predicted_depth_pyramid[s] if 'groundtruth_depth' in self._endpoints: gt_depth_stack = gt_depth_pyramid[s] depth_error = tf.abs(gt_depth_stack - predicted_depth_stack) # Weigh the spatial loss if a weight map is provided. Otherwise, revert # to original behavior. gt_depth_weight_stack = gt_depth_weight_pyramid[s] depth_error = depth_error * gt_depth_weight_stack # Optionally filter the depth map if a boolean depth filter is provided. # We use a TPU-friendly equivalent of tf.boolean_mask. depth_filter = tf.ones_like(depth_error, tf.float32) if 'groundtruth_depth_filter' in self._endpoints: depth_filter = depth_filter_pyramid[s] self._losses['depth_supervision'] += scale_w * tf.reduce_mean( depth_error * depth_filter) / tf.reduce_mean(depth_filter) # In theory, the training losses should be agnostic to the global scale of # the predicted depth. However in reality second order effects can lead to # (https://en.wikipedia.org/wiki/Von_Neumann_stability_analysis) diverging # modes. For some reason this happens when training on TPU. Since the # scale is immaterial anyway, we normalize it out, and the training # stabilizes. # # Note that the depth supervision term, which is sensitive to the scale, # was applied before this normalization. Therefore the scale of the depth # is learned. mean_depth = tf.reduce_mean(predicted_depth_stack) # When training starts, the depth sometimes tends to collapse to a # constant value, which seems to be a fixed point where the trainig can # stuck. To discourage this collapse, we penalize the reciprocal of the # variance with a tiny weight. Note that the mean of predicted_depth is # one, hence we subtract 1.0. depth_var = tf.reduce_mean( tf.square(predicted_depth_stack / mean_depth - 1.0)) self._losses['depth_variance'] = scale_w * 1.0 / depth_var if self._params.scale_normalization: predicted_depth_stack /= mean_depth flipped_predicted_depth_stack /= mean_depth disp = 1.0 / predicted_depth_stack mean_disp = tf.reduce_mean(disp, axis=[1, 2, 3], keep_dims=True) self._losses['depth_smoothing'] += ( scale_w * regularizers.joint_bilateral_smoothing( disp / mean_disp, rgb_stack)) self._output_endpoints['disparity'] = disp flipped_rgb_stack = flipped_rgb_pyramid[s] background_translation = tf.concat( self._endpoints['background_translation'], axis=0) flipped_background_translation = tf.concat( self._endpoints['background_translation'][::-1], axis=0) residual_translation = residual_translation_pyramid[s] flipped_residual_translation = flipped_residual_translation_pyramid[ s] if self._params.scale_normalization: background_translation /= mean_depth flipped_background_translation /= mean_depth residual_translation /= mean_depth flipped_residual_translation /= mean_depth translation = residual_translation + background_translation flipped_translation = (flipped_residual_translation + flipped_background_translation) rotation = tf.concat(self._endpoints['rotation'], axis=0) flipped_rotation = tf.concat(self._endpoints['rotation'][::-1], axis=0) intrinsics_mat = intrinsics_mat_pyramid[s] intrinsics_mat_inv = intrinsics_utils.invert_intrinsics_matrix( intrinsics_mat) validity_mask = validity_mask_pyramid[s] transformed_depth = transform_depth_map.using_motion_vector( tf.squeeze(predicted_depth_stack, axis=-1), translation, rotation, intrinsics_mat, intrinsics_mat_inv) flipped_predicted_depth_stack = tf.squeeze( flipped_predicted_depth_stack, axis=-1) if self._params.target_depth_stop_gradient: flipped_predicted_depth_stack = tf.stop_gradient( flipped_predicted_depth_stack) # The first and second halves of the batch not contain Frame1's and # Frame2's depths transformed onto Frame2 and Frame1 respectively. Te # demand consistency, we need to `flip` `predicted_depth` as well. loss_endpoints = ( consistency_losses.rgbd_and_motion_consistency_loss( transformed_depth, rgb_stack, flipped_predicted_depth_stack, flipped_rgb_stack, rotation, translation, flipped_rotation, flipped_translation, validity_mask=validity_mask)) normalized_trans = regularizers.normalize_motion_map( residual_translation, translation) self._losses[ 'motion_smoothing'] += scale_w * regularizers.l1smoothness( normalized_trans, self._weights.motion_drift == 0) self._losses[ 'motion_drift'] += scale_w * regularizers.sqrt_sparsity( normalized_trans) self._losses['depth_consistency'] += ( scale_w * loss_endpoints['depth_error']) self._losses[ 'rgb_consistency'] += scale_w * loss_endpoints['rgb_error'] self._losses[ 'ssim'] += scale_w * 0.5 * loss_endpoints['ssim_error'] self._losses['rotation_cycle_consistency'] += ( scale_w * loss_endpoints['rotation_error']) self._losses['translation_cycle_consistency'] += ( scale_w * loss_endpoints['translation_error']) self._output_endpoints['depth_proximity_weight'] = loss_endpoints[ 'depth_proximity_weight'] self._output_endpoints['trans'] = translation self._output_endpoints['inv_trans'] = flipped_translation for k, w in self._weights.as_dict().items(): # multiply by 2 to match the scale of the old code. self._losses[k] *= w * 2 if tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES): self._losses[tf.GraphKeys.REGULARIZATION_LOSSES] = tf.add_n( tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
def benchmark_model(self, warmup_runs, bm_runs, num_threads, trace_filename=None): """Benchmark model.""" if self.tensorrt: print('Using tensorrt ', self.tensorrt) graphdef = self.freeze_model() if num_threads > 0: print('num_threads for benchmarking: {}'.format(num_threads)) sess_config = tf.ConfigProto( intra_op_parallelism_threads=num_threads, inter_op_parallelism_threads=1) else: sess_config = tf.ConfigProto() # rewriter_config_pb2.RewriterConfig.OFF sess_config.graph_options.rewrite_options.dependency_optimization = 2 if self.use_xla: sess_config.graph_options.optimizer_options.global_jit_level = ( tf.OptimizerOptions.ON_2) with tf.Graph().as_default(), tf.Session(config=sess_config) as sess: inputs = tf.placeholder(tf.float32, name='input', shape=self.inputs_shape) output = self.build_model(inputs, is_training=False) img = np.random.uniform(size=self.inputs_shape) sess.run(tf.global_variables_initializer()) if self.tensorrt: fetches = [inputs.name] + [i.name for i in output] goutput = self.convert_tr(graphdef, fetches) inputs, output = goutput[0], goutput[1:] if not self.use_xla: # Don't use tf.group because XLA removes the whole graph for tf.group. output = tf.group(*output) else: output = tf.add_n([tf.reduce_sum(x) for x in output]) output_name = [output.name] input_name = inputs.name graphdef = tf.graph_util.convert_variables_to_constants( sess, sess.graph_def, output_name) with tf.Graph().as_default(), tf.Session(config=sess_config) as sess: tf.import_graph_def(graphdef, name='') for i in range(warmup_runs): start_time = time.time() sess.run(output_name, feed_dict={input_name: img}) print('Warm up: {} {:.4f}s'.format(i, time.time() - start_time)) print('Start benchmark runs total={}'.format(bm_runs)) start = time.perf_counter() for i in range(bm_runs): sess.run(output_name, feed_dict={input_name: img}) end = time.perf_counter() inference_time = (end - start) / 10 print('Per batch inference time: ', inference_time) print('FPS: ', self.batch_size / inference_time) if trace_filename: run_options = tf.RunOptions() run_options.trace_level = tf.RunOptions.FULL_TRACE run_metadata = tf.RunMetadata() sess.run(output_name, feed_dict={input_name: img}, options=run_options, run_metadata=run_metadata) logging.info('Dumping trace to %s', trace_filename) trace_dir = os.path.dirname(trace_filename) if not tf.io.gfile.exists(trace_dir): tf.io.gfile.makedirs(trace_dir) with tf.io.gfile.GFile(trace_filename, 'w') as trace_file: from tensorflow.python.client import timeline # pylint: disable=g-direct-tensorflow-import,g-import-not-at-top trace = timeline.Timeline( step_stats=run_metadata.step_stats) trace_file.write( trace.generate_chrome_trace_format(show_memory=True))
def detection_loss(cls_outputs, box_outputs, labels, params): """Computes total detection loss. Computes total detection loss including box and class loss from all levels. Args: cls_outputs: an OrderDict with keys representing levels and values representing logits in [batch_size, height, width, num_anchors]. box_outputs: an OrderDict with keys representing levels and values representing box regression targets in [batch_size, height, width, num_anchors * 4]. labels: the dictionary that returned from dataloader that includes groundtruth targets. params: the dictionary including training parameters specified in default_haprams function in this file. Returns: total_loss: an integer tensor representing total loss reducing from class and box losses from all levels. cls_loss: an integer tensor representing total class loss. box_loss: an integer tensor representing total box regression loss. box_iou_loss: an integer tensor representing total box iou loss. """ # Sum all positives in a batch for normalization and avoid zero # num_positives_sum, which would lead to inf loss during training num_positives_sum = tf.reduce_sum(labels['mean_num_positives']) + 1.0 positives_momentum = params.get('positives_momentum', None) or 0 if positives_momentum > 0: # normalize the num_positive_examples for training stability. moving_normalizer_var = tf.Variable( 0.0, name='moving_normalizer', dtype=tf.float32, synchronization=tf.VariableSynchronization.ON_READ, trainable=False, aggregation=tf.VariableAggregation.MEAN) num_positives_sum = tf.keras.backend.moving_average_update( moving_normalizer_var, num_positives_sum, momentum=params['positives_momentum']) elif positives_momentum < 0: num_positives_sum = utils.cross_replica_mean(num_positives_sum) levels = cls_outputs.keys() cls_losses = [] box_losses = [] for level in levels: # Onehot encoding for classification labels. cls_targets_at_level = tf.one_hot(labels['cls_targets_%d' % level], params['num_classes']) if params['data_format'] == 'channels_first': bs, _, width, height, _ = cls_targets_at_level.get_shape().as_list( ) cls_targets_at_level = tf.reshape(cls_targets_at_level, [bs, -1, width, height]) else: bs, width, height, _, _ = cls_targets_at_level.get_shape().as_list( ) cls_targets_at_level = tf.reshape(cls_targets_at_level, [bs, width, height, -1]) box_targets_at_level = labels['box_targets_%d' % level] cls_loss = focal_loss(cls_outputs[level], cls_targets_at_level, params['alpha'], params['gamma'], normalizer=num_positives_sum, label_smoothing=params['label_smoothing']) if params['data_format'] == 'channels_first': cls_loss = tf.reshape( cls_loss, [bs, -1, width, height, params['num_classes']]) else: cls_loss = tf.reshape( cls_loss, [bs, width, height, -1, params['num_classes']]) cls_loss *= tf.cast( tf.expand_dims(tf.not_equal(labels['cls_targets_%d' % level], -2), -1), tf.float32) cls_losses.append(tf.clip_by_value(tf.reduce_sum(cls_loss), 0.0, 2.0)) if params['box_loss_weight']: box_losses.append( _box_loss(box_outputs[level], box_targets_at_level, num_positives_sum, delta=params['delta'])) if params['iou_loss_type']: input_anchors = anchors.Anchors(params['min_level'], params['max_level'], params['num_scales'], params['aspect_ratios'], params['anchor_scale'], params['image_size']) box_output_list = [tf.reshape(box_outputs[i], [-1, 4]) for i in levels] box_outputs = tf.concat(box_output_list, axis=0) box_target_list = [ tf.reshape(labels['box_targets_%d' % level], [-1, 4]) for level in levels ] box_targets = tf.concat(box_target_list, axis=0) anchor_boxes = tf.tile(input_anchors.boxes, [params['batch_size'], 1]) box_outputs = anchors.decode_box_outputs(box_outputs, anchor_boxes) box_targets = anchors.decode_box_outputs(box_targets, anchor_boxes) box_iou_loss = _box_iou_loss(box_outputs, box_targets, num_positives_sum, params['iou_loss_type']) else: box_iou_loss = 0 # Sum per level losses to total loss. cls_loss = tf.add_n(cls_losses) box_loss = tf.add_n(box_losses) if box_losses else 0 total_loss = (cls_loss + params['box_loss_weight'] * box_loss + params['iou_loss_weight'] * box_iou_loss) return total_loss, cls_loss, box_loss, box_iou_loss
def train(): with tf.Graph().as_default(): with tf.device('/gpu:' + str(GPU_INDEX)): #pointclouds_pl, labels_pl = placeholder_inputs(BATCH_SIZE, NUM_POINT) pointclouds_pl, labels_pl = MODEL.placeholder_inputs( BATCH_SIZE, NUM_POINT, POINT_DIM) is_training_pl = tf.placeholder(tf.bool, shape=()) # Note the global_step=batch parameter to minimize. # That tells the optimizer to helpfully increment the 'batch' parameter # for you every time it trains. batch = tf.get_variable('batch', [], initializer=tf.constant_initializer(0), trainable=False) bn_decay = get_bn_decay(batch) tf.summary.scalar('bn_decay', bn_decay) # Get model and loss pred, end_points = MODEL.get_model(pointclouds_pl, is_training_pl, bn_decay=bn_decay) MODEL.get_loss(pred, labels_pl, end_points) losses = tf.get_collection('losses') total_loss = tf.add_n(losses, name='total_loss') tf.summary.scalar('total_loss', total_loss) for l in losses + [total_loss]: tf.summary.scalar(l.op.name, l) correct = tf.equal(tf.argmax(pred, 1), tf.to_int64(labels_pl)) accuracy = tf.reduce_sum(tf.cast(correct, tf.float32)) / float(BATCH_SIZE) tf.summary.scalar('accuracy', accuracy) print("--- Get training operator") # Get training operator learning_rate = get_learning_rate(batch) tf.summary.scalar('learning_rate', learning_rate) if OPTIMIZER == 'momentum': optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=MOMENTUM) elif OPTIMIZER == 'adam': optimizer = tf.train.AdamOptimizer(learning_rate) train_op = optimizer.minimize(total_loss, global_step=batch) # Add ops to save and restore all the variables. saver = tf.train.Saver() # Create a session config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True config.log_device_placement = False sess = tf.Session(config=config) # Add summary writers merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter( os.path.join(log.LOG_DIR, 'train'), sess.graph) test_writer = tf.summary.FileWriter(os.path.join(log.LOG_DIR, 'test'), sess.graph) # Init variables init = tf.global_variables_initializer() sess.run(init) ops = { 'pointclouds_pl': pointclouds_pl, 'labels_pl': labels_pl, 'is_training_pl': is_training_pl, 'pred': pred, 'loss': total_loss, 'train_op': train_op, 'merged': merged, 'step': batch, 'end_points': end_points } best_acc = -1 for epoch in range(MAX_EPOCH): log.out('**** EPOCH %03d ****' % (epoch)) sys.stdout.flush() train_one_epoch(sess, ops, train_writer) eval_one_epoch(sess, ops, test_writer) # Save the variables to disk. if epoch % 10 == 0: save_path = saver.save(sess, os.path.join(log.LOG_DIR, "model.ckpt")) log.out("Model saved in file: %s" % save_path)
output_shape=tf.stack([ input_data_num, img_size_1, img_size_1, channel_num_1 ]), strides=[1, 2, 2, 1], padding='SAME') decode_layer4 = tf.add(decode_layer4, b8) decode_layer4 = tf.nn.sigmoid(decode_layer4) tf.add_to_collection('reg_losses', tf.nn.l2_loss(W8)) tf.add_to_collection('reg_losses', tf.nn.l2_loss(b8)) # Loss layer with tf.name_scope('Loss_Layer') as scope: error = decode_layer4 - input_label error_square = tf.reduce_sum(tf.square(error)) reg_loss = tf.add_n(tf.get_collection('reg_losses')) total_loss = error_square + reg_loss * regulation # Summarize Scalar value tf.summary.scalar('Total_loss', total_loss) tf.summary.scalar('Reg_losses', reg_loss) # Set operation train_op = tf.train.AdamOptimizer(learning_rate).minimize(total_loss) summary_op = tf.summary.merge_all() '''세션 구성''' with tf.Session() as sess: sess.run(tf.global_variables_initializer()) batch_count = int(x_train.shape[0] / batch_size) writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph())
import tensorflow.compat.v1 as tf # 取消TensorFlow2.0特性 tf.disable_v2_behavior() tf.device('/gpu:0') input1 = tf.constant([1.0, 2.0, 3.0], name="input1") input2 = tf.Variable(tf.random_uniform([3]), name="input2") output = tf.add_n([input1, input2], name='add') # 将计算图写入日志 writer = tf.summary.FileWriter(r"D:\PythonWorkspace\TFTryout\eventlog", tf.get_default_graph()) writer.close()
def _build(self): self.hidden1 = defaultdict(list) for i, j in self.edge_types: # 产生两个隐藏层 self.hidden1[i].append(GraphConvolutionSparseMulti( input_dim=self.input_dim, output_dim=FLAGS.hidden1, edge_type=(i,j), num_types=self.edge_types[i,j], adj_mats=self.adj_mats, nonzero_feat=self.nonzero_feat, dropout=self.dropout, logging=self.logging)(self.inputs[j])) # 构架2层gcn 隐藏层,因为遍历中 i=0 i=1 for i, hid1 in self.hidden1.items(): self.hidden1[i] = tf.nn.relu(tf.add_n(hid1)) self.embeddings_reltyp = defaultdict(list) for i, j in self.edge_types: self.embeddings_reltyp[i].append(GraphConvolutionMulti( input_dim=FLAGS.hidden1, output_dim=FLAGS.hidden2, edge_type=(i,j), num_types=self.edge_types[i,j], adj_mats=self.adj_mats, dropout=self.dropout, logging=self.logging)(self.hidden1[j])) # 改 self.embeddings = [None] * self.num_obj_types#建立空embedding # for i, embeds in self.embeddings_reltyp.items():#将embedding_reltyp的参数传递到embedding #self.embeddings[i] = tf.nn.relu(tf.add_n(embeds)) self.embeddings[i] = tf.add_n(embeds) #此处为什么不用relu #隐藏层1 GCN层 隐藏2 连接解码器 self.edge_type2decoder = {} for i, j in self.edge_types: decoder = self.decoders[i, j] if decoder == 'innerproduct': self.edge_type2decoder[i, j] = InnerProductDecoder( input_dim=FLAGS.hidden2, logging=self.logging, edge_type=(i, j), num_types=self.edge_types[i, j], act=lambda x: x, dropout=self.dropout)#lambda不对整体产生影响 定义act为sigmoid函数,此处为何修改 elif decoder == 'distmult': self.edge_type2decoder[i, j] = DistMultDecoder( input_dim=FLAGS.hidden2, logging=self.logging, edge_type=(i, j), num_types=self.edge_types[i, j], act=lambda x: x, dropout=self.dropout) elif decoder == 'bilinear': self.edge_type2decoder[i, j] = BilinearDecoder( input_dim=FLAGS.hidden2, logging=self.logging, edge_type=(i, j), num_types=self.edge_types[i, j], act=lambda x: x, dropout=self.dropout) elif decoder == 'dedicom': self.edge_type2decoder[i, j] = DEDICOMDecoder( input_dim=FLAGS.hidden2, logging=self.logging, edge_type=(i, j), num_types=self.edge_types[i, j], act=lambda x: x, dropout=self.dropout) else: raise ValueError('Unknown decoder type') self.latent_inters = [] self.latent_varies = [] for edge_type in self.edge_types: decoder = self.decoders[edge_type] for k in range(self.edge_types[edge_type]): if decoder == 'innerproduct': glb = tf.eye(FLAGS.hidden2, FLAGS.hidden2) loc = tf.eye(FLAGS.hidden2, FLAGS.hidden2) elif decoder == 'distmult': glb = tf.diag(self.edge_type2decoder[edge_type].vars['relation_%d' % k]) loc = tf.eye(FLAGS.hidden2, FLAGS.hidden2) elif decoder == 'bilinear': glb = self.edge_type2decoder[edge_type].vars['relation_%d' % k] loc = tf.eye(FLAGS.hidden2, FLAGS.hidden2) elif decoder == 'dedicom': glb = self.edge_type2decoder[edge_type].vars['global_interaction'] loc = tf.diag(self.edge_type2decoder[edge_type].vars['local_variation_%d' % k]) else: raise ValueError('Unknown decoder type') self.latent_inters.append(glb)#wr self.latent_varies.append(loc)#cij
def __call__(self, inputs, is_training, end_points_collection=None): with tf.variable_scope(self._scope, reuse=tf.AUTO_REUSE) as scope: self._scope = scope.name if end_points_collection: end_points_collection['inputs'] = inputs inputs = conv2d_fixed_padding(inputs=inputs, filters=self.num_filters, kernel_size=self.kernel_size, strides=self.conv_stride, data_format=self.data_format) inputs = tf.identity(inputs, 'initial_conv') if self.first_pool_size: inputs = tf.compat.v1.layers.max_pooling2d( inputs=inputs, pool_size=self.first_pool_size, strides=self.first_pool_stride, padding='SAME', data_format=self.data_format) inputs = tf.identity(inputs, 'initial_max_pool') for i, num_blocks in enumerate(self.block_sizes): num_filters = self.num_filters * (2**i) inputs = block_layer(inputs=inputs, filters=num_filters, bottleneck=self.bottleneck, block_fn=_building_block_v2, blocks=num_blocks, strides=self.block_strides[i], training=is_training, name='block_layer{}'.format(i + 1), data_format=self.data_format, norm_type=self.norm_type) if end_points_collection: end_points_collection['h{}'.format(i + 1)] = inputs # Only apply the BN and ReLU for model that does pre_activation in each # building/bottleneck block, eg resnet V2. if self.pre_activation: inputs = normalize(inputs, is_training, self.data_format, self.norm_type) inputs = tf.nn.relu(inputs) # The current top layer has shape # `batch_size x pool_size x pool_size x final_size`. # ResNet does an Average Pooling layer over pool_size, # but that is the same as doing a reduce_mean. We do a reduce_mean # here because it performs better than AveragePooling2D. axes = [2, 3] if self.data_format == 'channels_first' else [1, 2] inputs = tf.reduce_mean(input_tensor=inputs, axis=axes, keepdims=True) inputs = tf.identity(inputs, 'final_reduce_mean') inputs = tf.squeeze(inputs, axes) inputs = tf.compat.v1.layers.dense(inputs=inputs, units=self.num_classes) inputs = tf.identity(inputs, 'final_dense') # Add weight decay to the loss. if is_training: var_list = [ v for v in tf.trainable_variables(self._scope) if self._loss_filter_fn(v.name) ] reg = contrib_layers.l2_regularizer(scale=self.weight_decay) l2_loss_list = list(map(reg, var_list)) l2_loss = tf.add_n(l2_loss_list) tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, l2_loss) self._was_called = True return inputs
def detection_loss(cls_outputs, box_outputs, labels, params): """Computes total detection loss. Computes total detection loss including box and class loss from all levels. Args: cls_outputs: an OrderDict with keys representing levels and values representing logits in [batch_size, height, width, num_anchors]. box_outputs: an OrderDict with keys representing levels and values representing box regression targets in [batch_size, height, width, num_anchors * 4]. labels: the dictionary that returned from dataloader that includes groundtruth targets. params: the dictionary including training parameters specified in default_haprams function in this file. Returns: total_loss: an integer tensor representing total loss reducing from class and box losses from all levels. cls_loss: an integer tensor representing total class loss. box_loss: an integer tensor representing total box regression loss. """ # Sum all positives in a batch for normalization and avoid zero # num_positives_sum, which would lead to inf loss during training num_positives_sum = tf.reduce_sum(labels['mean_num_positives']) + 1.0 levels = cls_outputs.keys() cls_losses = [] box_losses = [] for level in levels: if params['data_format'] == 'channels_first': labels['cls_targets_%d' % level] = tf.transpose( labels['cls_targets_%d' % level], [0, 3, 1, 2]) labels['box_targets_%d' % level] = tf.transpose( labels['box_targets_%d' % level], [0, 3, 1, 2]) # Onehot encoding for classification labels. cls_targets_at_level = tf.one_hot(labels['cls_targets_%d' % level], params['num_classes']) if params['data_format'] == 'channels_first': bs, _, width, height, _ = cls_targets_at_level.get_shape().as_list( ) cls_targets_at_level = tf.reshape(cls_targets_at_level, [bs, -1, width, height]) else: bs, width, height, _, _ = cls_targets_at_level.get_shape().as_list( ) cls_targets_at_level = tf.reshape(cls_targets_at_level, [bs, width, height, -1]) box_targets_at_level = labels['box_targets_%d' % level] cls_loss = _classification_loss(cls_outputs[level], cls_targets_at_level, num_positives_sum, alpha=params['alpha'], gamma=params['gamma']) if params['data_format'] == 'channels_first': cls_loss = tf.reshape( cls_loss, [bs, -1, width, height, params['num_classes']]) else: cls_loss = tf.reshape( cls_loss, [bs, width, height, -1, params['num_classes']]) cls_loss *= tf.cast( tf.expand_dims(tf.not_equal(labels['cls_targets_%d' % level], -2), -1), tf.float32) cls_losses.append(tf.reduce_sum(cls_loss)) box_losses.append( _box_loss(box_outputs[level], box_targets_at_level, num_positives_sum, delta=params['delta'])) # Sum per level losses to total loss. cls_loss = tf.add_n(cls_losses) box_loss = tf.add_n(box_losses) total_loss = cls_loss + params['box_loss_weight'] * box_loss return total_loss, cls_loss, box_loss
def build_bifpn_layer( feats, fpn_name, fpn_config, is_training, input_size, cell_ind, fpn_num_filters, min_level, max_level, separable_conv, apply_bn_for_resampling, conv_after_downsample, use_native_resize_op, conv_bn_relu_pattern, pooling_type): """Builds a feature pyramid given previous feature pyramid and config.""" config = fpn_config or get_fpn_config(fpn_name) tf.logging.info('building cell {} using config: {}'.format(cell_ind, config)) num_output_connections = [0 for _ in feats] for i, fnode in enumerate(config.nodes): with tf.variable_scope('fnode{}'.format(i)): tf.logging.info('fnode {} : {}'.format(i, fnode)) new_node_width = int(fnode['width_ratio'] * input_size) nodes = [] for idx, input_offset in enumerate(fnode['inputs_offsets']): input_node = feats[input_offset] num_output_connections[input_offset] += 1 input_node = resample_feature_map( input_node, '{}_{}_{}'.format(idx, input_offset, len(feats)), new_node_width, fpn_num_filters, apply_bn_for_resampling, is_training, conv_after_downsample, use_native_resize_op, pooling_type) nodes.append(input_node) # Combine all nodes. dtype = nodes[0].dtype if config.weight_method == 'attn': edge_weights = [tf.cast(tf.Variable(1.0, name='WSM'), dtype=dtype) for _ in range(len(fnode['inputs_offsets']))] normalized_weights = tf.nn.softmax(tf.stack(edge_weights)) nodes = tf.stack(nodes, axis=-1) new_node = tf.reduce_sum(tf.multiply(nodes, normalized_weights), -1) elif config.weight_method == 'fastattn': edge_weights = [ tf.nn.relu(tf.cast(tf.Variable(1.0, name='WSM'), dtype=dtype)) for _ in range(len(fnode['inputs_offsets'])) ] weights_sum = tf.add_n(edge_weights) nodes = [nodes[i] * edge_weights[i] / (weights_sum + 0.0001) for i in range(len(nodes))] new_node = tf.add_n(nodes) elif config.weight_method == 'sum': new_node = tf.add_n(nodes) else: raise ValueError( 'unknown weight_method {}'.format(config.weight_method)) with tf.variable_scope('op_after_combine{}'.format(len(feats))): if not conv_bn_relu_pattern: new_node = utils.relu_fn(new_node) if separable_conv: conv_op = functools.partial( tf.layers.separable_conv2d, depth_multiplier=1) else: conv_op = tf.layers.conv2d new_node = conv_op( new_node, filters=fpn_num_filters, kernel_size=(3, 3), padding='same', use_bias=True if not conv_bn_relu_pattern else False, name='conv') new_node = utils.batch_norm_relu( new_node, is_training_bn=is_training, relu=False if not conv_bn_relu_pattern else True, data_format='channels_last', name='bn') feats.append(new_node) num_output_connections.append(0) output_feats = {} for l in range(min_level, max_level + 1): for i, fnode in enumerate(reversed(config.nodes)): if fnode['width_ratio'] == F(l): output_feats[l] = feats[-1 - i] tf.logging.info('Output feature pyramid: {}'.format(output_feats)) return output_feats