def random_horizontal_flip(image, boxes=None, masks=None, keypoints=None, keypoint_flip_permutation=None, seed=None): """Randomly flips the image and detections horizontally. The probability of flipping the image is 50%. Args: image: rank 3 float32 tensor with shape [height, width, channels]. boxes: (optional) rank 2 float32 tensor with shape [N, 4] containing the bounding boxes. Boxes are in normalized form meaning their coordinates vary between [0, 1]. Each row is in the form of [ymin, xmin, ymax, xmax]. masks: (optional) rank 3 float32 tensor with shape [num_instances, height, width] containing instance masks. The masks are of the same height, width as the input `image`. keypoints: (optional) rank 3 float32 tensor with shape [num_instances, num_keypoints, 2]. The keypoints are in y-x normalized coordinates. keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip permutation. seed: random seed Returns: image: image which is the same shape as input image. If boxes, masks, keypoints, and keypoint_flip_permutation are not None, the function also returns the following tensors. boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. Boxes are in normalized form meaning their coordinates vary between [0, 1]. masks: rank 3 float32 tensor with shape [num_instances, height, width] containing instance masks. keypoints: rank 3 float32 tensor with shape [num_instances, num_keypoints, 2] Raises: ValueError: if keypoints are provided but keypoint_flip_permutation is not. """ def _flip_image(image): # flip image image_flipped = tf.image.flip_left_right(image) return image_flipped if keypoints is not None and keypoint_flip_permutation is None: raise ValueError( 'keypoints are provided but keypoints_flip_permutation is not provided' ) with tf.name_scope('RandomHorizontalFlip', values=[image, boxes]): result = [] # random variable defining whether to do flip or not do_a_flip_random = tf.greater(tf.random_uniform([], seed=seed), 0.5) # flip image image = tf.cond(do_a_flip_random, lambda: _flip_image(image), lambda: image) result.append(image) # flip boxes if boxes is not None: boxes = tf.cond(do_a_flip_random, lambda: _flip_boxes_left_right(boxes), lambda: boxes) result.append(boxes) # flip masks if masks is not None: masks = tf.cond(do_a_flip_random, lambda: _flip_masks_left_right(masks), lambda: masks) result.append(masks) # flip keypoints if keypoints is not None and keypoint_flip_permutation is not None: permutation = keypoint_flip_permutation keypoints = tf.cond( do_a_flip_random, lambda: keypoint_flip_horizontal(keypoints, 0.5, permutation), lambda: keypoints) result.append(keypoints) return tuple(result)
def _model_fn(features, labels, mode, params, model, variable_filter_fn=None): """Model definition entry. Args: features: the input image tensor with shape [batch_size, height, width, 3]. The height and width are fixed and equal. labels: the input labels in a dictionary. The labels include class targets and box targets which are dense label maps. The labels are generated from get_input_fn function in data/dataloader.py mode: the mode of TPUEstimator including TRAIN, EVAL, and PREDICT. params: the dictionary defines hyperparameters of model. The default settings are in default_hparams function in this file. model: the model outputs class logits and box regression outputs. variable_filter_fn: the filter function that takes trainable_variables and returns the variable list after applying the filter rule. Returns: tpu_spec: the TPUEstimatorSpec to run training, evaluation, or prediction. Raises: RuntimeError: if both ckpt and backbone_ckpt are set. """ # Convert params (dict) to Config for easier access. def _model_outputs(): return model(features, config=hparams_config.Config(params)) if params['use_bfloat16']: with tf.tpu.bfloat16_scope(): cls_outputs, box_outputs = _model_outputs() levels = cls_outputs.keys() for level in levels: cls_outputs[level] = tf.cast(cls_outputs[level], tf.float32) box_outputs[level] = tf.cast(box_outputs[level], tf.float32) else: cls_outputs, box_outputs = _model_outputs() levels = cls_outputs.keys() # First check if it is in PREDICT mode. if mode == tf.estimator.ModeKeys.PREDICT: predictions = { 'image': features, } for level in levels: predictions['cls_outputs_%d' % level] = cls_outputs[level] predictions['box_outputs_%d' % level] = box_outputs[level] return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) # Set up training loss and learning rate. update_learning_rate_schedule_parameters(params) global_step = tf.train.get_or_create_global_step() learning_rate = learning_rate_schedule(params, global_step) # cls_loss and box_loss are for logging. only total_loss is optimized. det_loss, cls_loss, box_loss = detection_loss(cls_outputs, box_outputs, labels, params) l2loss = reg_l2_loss(params['weight_decay']) total_loss = det_loss + l2loss if mode == tf.estimator.ModeKeys.TRAIN: utils.scalar('lrn_rate', learning_rate) utils.scalar('trainloss/cls_loss', cls_loss) utils.scalar('trainloss/box_loss', box_loss) utils.scalar('trainloss/det_loss', det_loss) utils.scalar('trainloss/l2_loss', l2loss) utils.scalar('trainloss/loss', total_loss) moving_average_decay = params['moving_average_decay'] if moving_average_decay: ema = tf.train.ExponentialMovingAverage(decay=moving_average_decay, num_updates=global_step) ema_vars = utils.get_ema_vars() if mode == tf.estimator.ModeKeys.TRAIN: optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=params['momentum']) if params['use_tpu']: optimizer = tf.tpu.CrossShardOptimizer(optimizer) # Batch norm requires update_ops to be added as a train_op dependency. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) var_list = tf.trainable_variables() if variable_filter_fn: var_list = variable_filter_fn(var_list, params['resnet_depth']) if params.get('clip_gradients_norm', 0) > 0: logging.info('clip gradients norm by %f', params['clip_gradients_norm']) grads_and_vars = optimizer.compute_gradients(total_loss, var_list) with tf.name_scope('clip'): grads = [gv[0] for gv in grads_and_vars] tvars = [gv[1] for gv in grads_and_vars] clipped_grads, gnorm = tf.clip_by_global_norm( grads, params['clip_gradients_norm']) utils.scalar('gnorm', gnorm) grads_and_vars = list(zip(clipped_grads, tvars)) with tf.control_dependencies(update_ops): train_op = optimizer.apply_gradients(grads_and_vars, global_step) else: with tf.control_dependencies(update_ops): train_op = optimizer.minimize(total_loss, global_step, var_list=var_list) if moving_average_decay: with tf.control_dependencies([train_op]): train_op = ema.apply(ema_vars) else: train_op = None eval_metrics = None if mode == tf.estimator.ModeKeys.EVAL: def metric_fn(**kwargs): """Returns a dictionary that has the evaluation metrics.""" batch_size = params['batch_size'] eval_anchors = anchors.Anchors(params['min_level'], params['max_level'], params['num_scales'], params['aspect_ratios'], params['anchor_scale'], params['image_size']) anchor_labeler = anchors.AnchorLabeler(eval_anchors, params['num_classes']) cls_loss = tf.metrics.mean(kwargs['cls_loss_repeat']) box_loss = tf.metrics.mean(kwargs['box_loss_repeat']) coco_metrics = coco_metric_fn(batch_size, anchor_labeler, params['val_json_file'], **kwargs) # Add metrics to output. output_metrics = { 'cls_loss': cls_loss, 'box_loss': box_loss, } output_metrics.update(coco_metrics) return output_metrics cls_loss_repeat = tf.reshape( tf.tile(tf.expand_dims(cls_loss, 0), [ params['batch_size'], ]), [params['batch_size'], 1]) box_loss_repeat = tf.reshape( tf.tile(tf.expand_dims(box_loss, 0), [ params['batch_size'], ]), [params['batch_size'], 1]) metric_fn_inputs = { 'cls_loss_repeat': cls_loss_repeat, 'box_loss_repeat': box_loss_repeat, 'source_ids': labels['source_ids'], 'groundtruth_data': labels['groundtruth_data'], 'image_scales': labels['image_scales'], } add_metric_fn_inputs(params, cls_outputs, box_outputs, metric_fn_inputs) eval_metrics = (metric_fn, metric_fn_inputs) checkpoint = params.get('ckpt') or params.get('backbone_ckpt') if checkpoint and mode == tf.estimator.ModeKeys.TRAIN: # Initialize the model from an EfficientDet or backbone checkpoint. if params.get('ckpt') and params.get('backbone_ckpt'): raise RuntimeError( '--backbone_ckpt and --checkpoint are mutually exclusive') elif params.get('backbone_ckpt'): var_scope = params['backbone_name'] + '/' if params['ckpt_var_scope'] is None: # Use backbone name as default checkpoint scope. ckpt_scope = params['backbone_name'] + '/' else: ckpt_scope = params['ckpt_var_scope'] + '/' else: # Load every var in the given checkpoint var_scope = ckpt_scope = '/' def scaffold_fn(): """Loads pretrained model through scaffold function.""" logging.info('restore variables from %s', checkpoint) var_map = utils.get_ckt_var_map(ckpt_path=checkpoint, ckpt_scope=ckpt_scope, var_scope=var_scope) tf.train.init_from_checkpoint(checkpoint, var_map) return tf.train.Scaffold() elif mode == tf.estimator.ModeKeys.EVAL and moving_average_decay: def scaffold_fn(): """Load moving average variables for eval.""" logging.info('Load EMA vars with ema_decay=%f', moving_average_decay) restore_vars_dict = ema.variables_to_restore(ema_vars) saver = tf.train.Saver(restore_vars_dict) return tf.train.Scaffold(saver=saver) else: scaffold_fn = None return tf.estimator.tpu.TPUEstimatorSpec(mode=mode, loss=total_loss, train_op=train_op, eval_metrics=eval_metrics, host_call=utils.get_tpu_host_call( global_step, params), scaffold_fn=scaffold_fn)
def _model_fn(features, labels, mode, params, variable_filter_fn=None): """Model defination for the Mask-RCNN model based on ResNet. Args: features: the input image tensor and auxiliary information, such as `image_info` and `source_ids`. The image tensor has a shape of [batch_size, height, width, 3]. The height and width are fixed and equal. labels: the input labels in a dictionary. The labels include score targets and box targets which are dense label maps. The labels are generated from get_input_fn function in data/dataloader.py mode: the mode of TPUEstimator including TRAIN, EVAL, and PREDICT. params: the dictionary defines hyperparameters of model. The default settings are in default_hparams function in this file. variable_filter_fn: the filter function that takes trainable_variables and returns the variable list after applying the filter rule. Returns: tpu_spec: the TPUEstimatorSpec to run training, evaluation, or prediction. """ if (mode == tf.estimator.ModeKeys.PREDICT or mode == tf.estimator.ModeKeys.EVAL): if ((params['include_groundtruth_in_features'] or mode == tf.estimator.ModeKeys.EVAL) and ('labels' in features)): # In include groundtruth for eval. labels = features['labels'] if 'features' in features: features = features['features'] # Otherwise, it is in export mode, the features is past in directly. if params['precision'] == 'bfloat16': with tf.tpu.bfloat16_scope(): model_outputs = build_model_graph( features, labels, mode == tf.estimator.ModeKeys.TRAIN, params) model_outputs.update({ 'source_id': features['source_ids'], 'image_info': features['image_info'], }) def cast_outputs_to_float(d): for k, v in sorted(six.iteritems(d)): if isinstance(v, dict): cast_outputs_to_float(v) else: d[k] = tf.cast(v, tf.float32) cast_outputs_to_float(model_outputs) else: model_outputs = build_model_graph(features, labels, mode == tf.estimator.ModeKeys.TRAIN, params) model_outputs.update({ 'source_id': features['source_ids'], 'image_info': features['image_info'], }) # First check if it is in PREDICT or EVAL mode to fill out predictions. # Predictions are used during the eval step to generate metrics. predictions = {} if (mode == tf.estimator.ModeKeys.PREDICT or mode == tf.estimator.ModeKeys.EVAL): if 'orig_images' in features: model_outputs['orig_images'] = features['orig_images'] if labels and params['include_groundtruth_in_features']: # Labels can only be embedded in predictions. The predition cannot output # dictionary as a value. predictions.update(labels) model_outputs.pop('fpn_features', None) predictions.update(model_outputs) # If we are doing PREDICT, we can return here. if mode == tf.estimator.ModeKeys.PREDICT: if params['use_tpu']: return tf.estimator.tpu.TPUEstimatorSpec( mode=mode, predictions=predictions) return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) # Set up training loss and learning rate. global_step = tf.train.get_or_create_global_step() if params['learning_rate_type'] == 'step': learning_rate = learning_rates.step_learning_rate_with_linear_warmup( global_step, params['init_learning_rate'], params['warmup_learning_rate'], params['warmup_steps'], params['learning_rate_levels'], params['learning_rate_steps']) elif params['learning_rate_type'] == 'cosine': learning_rate = learning_rates.cosine_learning_rate_with_linear_warmup( global_step, params['init_learning_rate'], params['warmup_learning_rate'], params['warmup_steps'], params['total_steps']) else: raise ValueError('Unsupported learning rate type: `{}`!'.format( params['learning_rate_type'])) # score_loss and box_loss are for logging. only total_loss is optimized. total_rpn_loss, rpn_score_loss, rpn_box_loss = losses.rpn_loss( model_outputs['rpn_score_outputs'], model_outputs['rpn_box_outputs'], labels, params) (total_fast_rcnn_loss, fast_rcnn_class_loss, fast_rcnn_box_loss) = losses.fast_rcnn_loss( model_outputs['class_outputs'], model_outputs['box_outputs'], model_outputs['class_targets'], model_outputs['box_targets'], params) # Only training has the mask loss. Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/modeling/model_builder.py # pylint: disable=line-too-long if mode == tf.estimator.ModeKeys.TRAIN and params['include_mask']: mask_loss = losses.mask_rcnn_loss( model_outputs['mask_outputs'], model_outputs['mask_targets'], model_outputs['selected_class_targets'], params) else: mask_loss = 0. if variable_filter_fn and ('resnet' in params['backbone']): var_list = variable_filter_fn(tf.trainable_variables(), params['backbone'] + '/') else: var_list = tf.trainable_variables() l2_regularization_loss = params['l2_weight_decay'] * tf.add_n([ tf.nn.l2_loss(v) for v in var_list if 'batch_normalization' not in v.name and 'bias' not in v.name ]) total_loss = (total_rpn_loss + total_fast_rcnn_loss + mask_loss + l2_regularization_loss) host_call = None if mode == tf.estimator.ModeKeys.TRAIN: optimizer = create_optimizer(learning_rate, params) if params['use_tpu']: optimizer = tf.tpu.CrossShardOptimizer(optimizer) scaffold_fn = None if params['warm_start_path']: def warm_start_scaffold_fn(): logging.info('model_fn warm start from: %s,', params['warm_start_path']) assignment_map = _build_assigment_map( optimizer, prefix=None, skip_variables_regex=params['skip_checkpoint_variables']) tf.train.init_from_checkpoint(params['warm_start_path'], assignment_map) return tf.train.Scaffold() scaffold_fn = warm_start_scaffold_fn elif params['checkpoint']: def backbone_scaffold_fn(): """Loads pretrained model through scaffold function.""" # Exclude all variable of optimizer. vars_to_load = _build_assigment_map( optimizer, prefix=params['backbone'] + '/', skip_variables_regex=params['skip_checkpoint_variables']) tf.train.init_from_checkpoint(params['checkpoint'], vars_to_load) if not vars_to_load: raise ValueError('Variables to load is empty.') return tf.train.Scaffold() scaffold_fn = backbone_scaffold_fn # Batch norm requires update_ops to be added as a train_op dependency. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) grads_and_vars = optimizer.compute_gradients(total_loss, var_list) if params['global_gradient_clip_ratio'] > 0: # Clips the gradients for training stability. # Refer: https://arxiv.org/abs/1211.5063 with tf.name_scope('clipping'): old_grads, variables = zip(*grads_and_vars) num_weights = sum(g.shape.num_elements() for g in old_grads if g is not None) clip_norm = params['global_gradient_clip_ratio'] * math.sqrt( num_weights) logging.info( 'Global clip norm set to %g for %d variables with %d elements.', clip_norm, sum(1 for g in old_grads if g is not None), num_weights) gradients, _ = tf.clip_by_global_norm(old_grads, clip_norm) else: gradients, variables = zip(*grads_and_vars) grads_and_vars = [] # Special treatment for biases (beta is named as bias in reference model) # Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/modeling/optimizer.py#L113 # pylint: disable=line-too-long for grad, var in zip(gradients, variables): if grad is not None and ('beta' in var.name or 'bias' in var.name): grad = 2.0 * grad grads_and_vars.append((grad, var)) with tf.control_dependencies(update_ops): train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) if params['use_host_call']: def host_call_fn(global_step, total_loss, total_rpn_loss, rpn_score_loss, rpn_box_loss, total_fast_rcnn_loss, fast_rcnn_class_loss, fast_rcnn_box_loss, mask_loss, l2_regularization_loss, learning_rate): """Training host call. Creates scalar summaries for training metrics. This function is executed on the CPU and should not directly reference any Tensors in the rest of the `model_fn`. To pass Tensors from the model to the `metric_fn`, provide as part of the `host_call`. See https://www.tensorflow.org/api_docs/python/tf/estimator/tpu/TPUEstimatorSpec for more information. Arguments should match the list of `Tensor` objects passed as the second element in the tuple passed to `host_call`. Args: global_step: `Tensor with shape `[batch, ]` for the global_step. total_loss: `Tensor` with shape `[batch, ]` for the training loss. total_rpn_loss: `Tensor` with shape `[batch, ]` for the training RPN loss. rpn_score_loss: `Tensor` with shape `[batch, ]` for the training RPN score loss. rpn_box_loss: `Tensor` with shape `[batch, ]` for the training RPN box loss. total_fast_rcnn_loss: `Tensor` with shape `[batch, ]` for the training Mask-RCNN loss. fast_rcnn_class_loss: `Tensor` with shape `[batch, ]` for the training Mask-RCNN class loss. fast_rcnn_box_loss: `Tensor` with shape `[batch, ]` for the training Mask-RCNN box loss. mask_loss: `Tensor` with shape `[batch, ]` for the training Mask-RCNN mask loss. l2_regularization_loss: `Tensor` with shape `[batch, ]` for the regularization loss. learning_rate: `Tensor` with shape `[batch, ]` for the learning_rate. Returns: List of summary ops to run on the CPU host. """ # Outfeed supports int32 but global_step is expected to be int64. global_step = tf.reduce_mean(global_step) # Host call fns are executed FLAGS.iterations_per_loop times after one # TPU loop is finished, setting max_queue value to the same as number of # iterations will make the summary writer only flush the data to storage # once per loop. with (tf2.summary.create_file_writer( params['model_dir'], max_queue=params['iterations_per_loop']).as_default()): with tf2.summary.record_if(True): tf2.summary.scalar('total_loss', tf.reduce_mean(total_loss), step=global_step) tf2.summary.scalar('total_rpn_loss', tf.reduce_mean(total_rpn_loss), step=global_step) tf2.summary.scalar('rpn_score_loss', tf.reduce_mean(rpn_score_loss), step=global_step) tf2.summary.scalar('rpn_box_loss', tf.reduce_mean(rpn_box_loss), step=global_step) tf2.summary.scalar( 'total_fast_rcnn_loss', tf.reduce_mean(total_fast_rcnn_loss), step=global_step) tf2.summary.scalar( 'fast_rcnn_class_loss', tf.reduce_mean(fast_rcnn_class_loss), step=global_step) tf2.summary.scalar('fast_rcnn_box_loss', tf.reduce_mean(fast_rcnn_box_loss), step=global_step) if params['include_mask']: tf2.summary.scalar('mask_loss', tf.reduce_mean(mask_loss), step=global_step) tf2.summary.scalar( 'l2_regularization_loss', tf.reduce_mean(l2_regularization_loss), step=global_step) tf2.summary.scalar('learning_rate', tf.reduce_mean(learning_rate), step=global_step) return tf.summary.all_v2_summary_ops() # To log the loss, current learning rate, and epoch for Tensorboard, the # summary op needs to be run on the host CPU via host_call. host_call # expects [batch_size, ...] Tensors, thus reshape to introduce a batch # dimension. These Tensors are implicitly concatenated to # [params['batch_size']]. global_step_t = tf.reshape(global_step, [1]) total_loss_t = tf.reshape(total_loss, [1]) total_rpn_loss_t = tf.reshape(total_rpn_loss, [1]) rpn_score_loss_t = tf.reshape(rpn_score_loss, [1]) rpn_box_loss_t = tf.reshape(rpn_box_loss, [1]) total_fast_rcnn_loss_t = tf.reshape(total_fast_rcnn_loss, [1]) fast_rcnn_class_loss_t = tf.reshape(fast_rcnn_class_loss, [1]) fast_rcnn_box_loss_t = tf.reshape(fast_rcnn_box_loss, [1]) mask_loss_t = tf.reshape(mask_loss, [1]) l2_regularization_loss = tf.reshape(l2_regularization_loss, [1]) learning_rate_t = tf.reshape(learning_rate, [1]) host_call = (host_call_fn, [ global_step_t, total_loss_t, total_rpn_loss_t, rpn_score_loss_t, rpn_box_loss_t, total_fast_rcnn_loss_t, fast_rcnn_class_loss_t, fast_rcnn_box_loss_t, mask_loss_t, l2_regularization_loss, learning_rate_t ]) else: train_op = None scaffold_fn = None if params['use_tpu']: return tf.estimator.tpu.TPUEstimatorSpec(mode=mode, loss=total_loss, train_op=train_op, host_call=host_call, scaffold_fn=scaffold_fn) return tf.estimator.EstimatorSpec(mode=mode, loss=total_loss, train_op=train_op)
inputs, targets, lr, keep_prob = model_input() #setting sequence length sequence_length = tf.placeholder_with_default(25, None, "Sequence_length") # Getting the shape of the inputs tensor input_shape = tf.shape(inputs) # Getting the training and test predictions training_predictions, test_predictions = seq2seq_model( tf.reverse(inputs, [-1]), targets, keep_prob, batch_size, sequence_length, len(answerword2int), len(questionword2int), encoding_embedding_size, decoding_embedding_size, rnn_size, num_layers, questionword2int) # Setting up the Loss Error, the Optimizer and Gradient Clipping with tf.name_scope("optimization"): loss_error = tf.contrib.seq2seq.sequence_loss( training_predictions, targets, tf.ones([input_shape[0], sequence_length])) optimizer = tf.train.AdamOptimizer(learning_rate) gradients = optimizer.compute_gradients(loss_error) clipped_gradients = [(tf.clip_by_value(grad_tensor, -5., 5.), grad_variable) for grad_tensor, grad_variable in gradients if grad_tensor is not None] optimizer_gradient_clipping = optimizer.apply_gradients(clipped_gradients) # Padding the sequences with the token def apply_padding(batch_of_sequences, word2int): max_sequence_length = max(
def nelder_mead_one_step(current_simplex, current_objective_values, objective_function=None, dim=None, func_tolerance=None, position_tolerance=None, batch_evaluate_objective=False, reflection=None, expansion=None, contraction=None, shrinkage=None, name=None): """A single iteration of the Nelder Mead algorithm.""" with tf1.name_scope(name, 'nelder_mead_one_step'): domain_dtype = current_simplex.dtype.base_dtype order = tf.argsort(current_objective_values, direction='ASCENDING', stable=True) (best_index, worst_index, second_worst_index) = order[0], order[-1], order[-2] worst_vertex = current_simplex[worst_index] (best_objective_value, worst_objective_value, second_worst_objective_value) = ( current_objective_values[best_index], current_objective_values[worst_index], current_objective_values[second_worst_index]) # Compute the centroid of the face opposite the worst vertex. face_centroid = tf.reduce_sum(input_tensor=current_simplex, axis=0) - worst_vertex face_centroid /= tf.cast(dim, domain_dtype) # Reflect the worst vertex through the opposite face. reflected = face_centroid + reflection * (face_centroid - worst_vertex) objective_at_reflected = objective_function(reflected) num_evaluations = 1 has_converged = _check_convergence(current_simplex, current_simplex[best_index], best_objective_value, worst_objective_value, func_tolerance, position_tolerance) def _converged_fn(): return (True, current_simplex, current_objective_values, 0) case0 = has_converged, _converged_fn accept_reflected = ( (objective_at_reflected < second_worst_objective_value) & (objective_at_reflected >= best_objective_value)) accept_reflected_fn = _accept_reflected_fn(current_simplex, current_objective_values, worst_index, reflected, objective_at_reflected) case1 = accept_reflected, accept_reflected_fn do_expansion = objective_at_reflected < best_objective_value expansion_fn = _expansion_fn(objective_function, current_simplex, current_objective_values, worst_index, reflected, objective_at_reflected, face_centroid, expansion) case2 = do_expansion, expansion_fn do_outside_contraction = ( (objective_at_reflected < worst_objective_value) & (objective_at_reflected >= second_worst_objective_value)) outside_contraction_fn = _outside_contraction_fn( objective_function, current_simplex, current_objective_values, face_centroid, best_index, worst_index, reflected, objective_at_reflected, contraction, shrinkage, batch_evaluate_objective) case3 = do_outside_contraction, outside_contraction_fn default_fn = _inside_contraction_fn( objective_function, current_simplex, current_objective_values, face_centroid, best_index, worst_index, worst_objective_value, contraction, shrinkage, batch_evaluate_objective) (converged, next_simplex, next_objective_at_simplex, case_evals) = prefer_static.case([case0, case1, case2, case3], default=default_fn, exclusive=False) next_simplex.set_shape(current_simplex.shape) next_objective_at_simplex.set_shape(current_objective_values.shape) return (converged, next_simplex, next_objective_at_simplex, num_evaluations + case_evals)
def _maybe_update_block_mask(self, weights, threshold, gradients=None): """Performs block-granular masking of the weights. Block pruning occurs only if the block_height or block_width is > 1 and if the weight tensor, when squeezed, has ndims = 2. Otherwise, elementwise pruning occurs. Args: weights: The weight tensor that needs to be masked. threshold: The current threshold value. The function will compute a new threshold and return the exponential moving average using the current value of threshold gradients: The gradient tensor that used for salience calculation. Returns: new_threshold: The new value of the threshold based on weights, and sparsity at the current global_step new_mask: A numpy array of the same size and shape as weights containing 0 or 1 to indicate which of the values in weights falls below the threshold Raises: ValueError: if block pooling function is not AVG or MAX """ block_dims = self._get_block_dims(weights.op.name) squeezed_weights = tf.squeeze(weights) if squeezed_weights.get_shape().ndims != 2 or block_dims == [1, 1]: return self._update_mask(weights, threshold, gradients) if (self._spec.prune_option in ('first_order_gradient', 'second_order_gradient') and gradients is None): raise ValueError( 'Gradient based pruning implementation for block sparsity is not supported.' ) for i in range(2): if block_dims[i] == -1: block_dims[i] = squeezed_weights.get_shape()[i] if self._block_pooling_function not in ['AVG', 'MAX']: raise ValueError( 'Unknown pooling function for block sparsity: %s' % self._block_pooling_function) with tf.name_scope(weights.op.name + '_pruning_ops'): abs_weights = tf.abs(squeezed_weights) if gradients is not None: abs_gradients = tf.abs(tf.squeeze(gradients)) pool_window = block_dims pool_fn = pruning_utils.factorized_pool squeeze_axis = None if not self._spec.use_tpu: pool_fn = tf.nn.pool abs_weights = tf.reshape(abs_weights, [ 1, abs_weights.get_shape()[0], abs_weights.get_shape()[1], 1 ]) if gradients is not None: # Reshape gradients to be a rank 4 tensor of shape [1, .., .., 1]. abs_gradients = tf.reshape(abs_gradients, [ 1, gradients.get_shape()[0], gradients.get_shape()[1], 1 ]) squeeze_axis = [0, 3] pooled_weights = pool_fn(abs_weights, window_shape=pool_window, pooling_type=self._block_pooling_function, strides=pool_window, padding='SAME', name=weights.op.name + '_pooled') if gradients is not None: pooled_gradients = pool_fn( abs_gradients, window_shape=pool_window, pooling_type=self._block_pooling_function, strides=pool_window, padding='SAME', name=gradients.op.name + '_pooled') else: pooled_gradients = None if pooled_weights.get_shape().ndims != 2: pooled_weights = tf.squeeze(pooled_weights, axis=squeeze_axis) if gradients is not None and pooled_gradients.get_shape( ).ndims != 2: pooled_gradients = tf.squeeze(pooled_gradients, axis=squeeze_axis) smoothed_threshold, new_mask = self._update_mask( pooled_weights, threshold, pooled_gradients) updated_mask = pruning_utils.expand_tensor(new_mask, block_dims) sliced_mask = tf.slice(updated_mask, [0, 0], [ squeezed_weights.get_shape()[0], squeezed_weights.get_shape()[1] ]) return smoothed_threshold, tf.reshape(sliced_mask, tf.shape(weights))
def build_graph(self, wordvec_size=100, hidden_size=100, time_size=5, optimizer=None): """Buid tensorflow graph. Args: wordvec_size (int): Dimension of Distributed Represendations of the words hidden_size (int): Dimension of hidden layer time_size (int): Count to expand truncated BPTT optimizer: Optimizer instance. Default to tf.train.Adam """ self.wordvec_size = wordvec_size self.hidden_size = hidden_size self.time_size = time_size self.learning_rate = tf.placeholder(tf.float32) incomes = tf.placeholder( tf.int32, shape=(None, time_size), name='incomes', ) labels = tf.placeholder( tf.int32, shape=(None, time_size), name='labels', ) prev_h = tf.placeholder(tf.float32, shape=(None, hidden_size), name='prev_h') self.prev_h = prev_h prev_c = tf.placeholder(tf.float32, shape=(None, hidden_size), name='prev_c') self.prev_c = prev_c with tf.name_scope('TimeEmbedding'): embed_W = tf.Variable( np.random.randn(self.vocab_size, wordvec_size) / 100, dtype=tf.float32, name='embed_W', ) xs = tf.gather(embed_W, incomes) with tf.name_scope('TimeLSTM'): self.lstm_Wx = tf.Variable( randn(4, wordvec_size, hidden_size) / sqrt(wordvec_size), dtype=tf.float32, name='lstm_Wx', ) self.lstm_Wh = tf.Variable( randn(4, hidden_size, hidden_size) / sqrt(hidden_size), dtype=tf.float32, name='lstm_Wh', ) self.lstm_bias = tf.Variable( np.zeros([4, hidden_size]), dtype=tf.float32, name='lstm_bias', ) h = prev_h c = prev_c time_h = [] time_c = [] for i in range(time_size): next_h, next_c = self.lstm(xs[:, i, :], c, h) time_h.append(next_h) time_c.append(next_c) h = time_h[-1] c = time_c[-1] hs = tf.stack(time_h, 1) self.next_h = hs[:, time_size - 1, :] self.next_c = time_c[time_size - 1] with tf.name_scope('TimeAffine'): affine_W = tf.Variable( randn(hidden_size, self.vocab_size) / sqrt(hidden_size), dtype=tf.float32, name='affine_W', ) affine_bias = tf.Variable( np.zeros(self.vocab_size), dtype=tf.float32, name='affine_bias', ) logits = tf.math.add( tf.matmul(hs, affine_W), affine_bias, name='logits', ) cee = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( labels=tf.reshape(labels, [-1]), logits=tf.reshape(logits, [-1, self.vocab_size]), ), name='CEE', ) self.los_summaries = [ tf.summary.scalar('Perplexity', tf.math.exp(cee), family='Loss'), tf.summary.scalar('CrossEntorpyError', cee, family='Loss'), ] self.incomes = incomes self.labels = labels if optimizer is None: optimizer = tf.train.AdamOptimizer( learning_rate=self.learning_rate) grads_vars = optimizer.compute_gradients(cee, tf.trainable_variables()) with tf.name_scope('GradientsClip'): clipped_grads_vars = [(tf.clip_by_norm(grad, 0.25), var) for grad, var in grads_vars] self.training_op = optimizer.apply_gradients(clipped_grads_vars)
def expanded_conv(input_tensor, num_outputs, expansion_size=expand_input_by_factor(6), stride=1, rate=1, kernel_size=(3, 3), residual=True, normalizer_fn=None, split_projection=1, split_expansion=1, expansion_transform=None, depthwise_location='expansion', depthwise_channel_multiplier=1, endpoints=None, use_explicit_padding=False, padding='SAME', scope=None): """Depthwise Convolution Block with expansion. Builds a composite convolution that has the following structure expansion (1x1) -> depthwise (kernel_size) -> projection (1x1) Args: input_tensor: input num_outputs: number of outputs in the final layer. expansion_size: the size of expansion, could be a constant or a callable. If latter it will be provided 'num_inputs' as an input. For forward compatibility it should accept arbitrary keyword arguments. Default will expand the input by factor of 6. stride: depthwise stride rate: depthwise rate kernel_size: depthwise kernel residual: whether to include residual connection between input and output. normalizer_fn: batchnorm or otherwise split_projection: how many ways to split projection operator (that is conv expansion->bottleneck) split_expansion: how many ways to split expansion op (that is conv bottleneck->expansion) ops will keep depth divisible by this value. expansion_transform: Optional function that takes expansion as a single input and returns output. depthwise_location: where to put depthwise covnvolutions supported values None, 'input', 'output', 'expansion' depthwise_channel_multiplier: depthwise channel multiplier: each input will replicated (with different filters) that many times. So if input had c channels, output will have c x depthwise_channel_multpilier. endpoints: An optional dictionary into which intermediate endpoints are placed. The keys "expansion_output", "depthwise_output", "projection_output" and "expansion_transform" are always populated, even if the corresponding functions are not invoked. use_explicit_padding: Use 'VALID' padding for convolutions, but prepad inputs so that the output dimensions are the same as if 'SAME' padding were used. padding: Padding type to use if `use_explicit_padding` is not set. scope: optional scope. Returns: Tensor of depth num_outputs Raises: TypeError: on inval """ with tf.variable_scope(scope, default_name='expanded_conv') as s, \ tf.name_scope(s.original_name_scope): prev_depth = input_tensor.get_shape().as_list()[3] if depthwise_location not in [None, 'input', 'output', 'expansion']: raise TypeError( '%r is unknown value for depthwise_location' % depthwise_location) if use_explicit_padding: if padding != 'SAME': raise TypeError('`use_explicit_padding` should only be used with ' '"SAME" padding.') padding = 'VALID' depthwise_func = functools.partial( slim.separable_conv2d, num_outputs=None, kernel_size=kernel_size, depth_multiplier=depthwise_channel_multiplier, stride=stride, rate=rate, normalizer_fn=normalizer_fn, padding=padding, scope='depthwise') # b1 -> b2 * r -> b2 # i -> (o * r) (bottleneck) -> o input_tensor = tf.identity(input_tensor, 'input') net = input_tensor if depthwise_location == 'input': if use_explicit_padding: net = _fixed_padding(net, kernel_size, rate) net = depthwise_func(net, activation_fn=None) if callable(expansion_size): inner_size = expansion_size(num_inputs=prev_depth) else: inner_size = expansion_size if inner_size > net.shape[3]: net = split_conv( net, inner_size, num_ways=split_expansion, scope='expand', stride=1, normalizer_fn=normalizer_fn) net = tf.identity(net, 'expansion_output') if endpoints is not None: endpoints['expansion_output'] = net if depthwise_location == 'expansion': if use_explicit_padding: net = _fixed_padding(net, kernel_size, rate) net = depthwise_func(net) net = tf.identity(net, name='depthwise_output') if endpoints is not None: endpoints['depthwise_output'] = net if expansion_transform: net = expansion_transform(expansion_tensor=net, input_tensor=input_tensor) # Note in contrast with expansion, we always have # projection to produce the desired output size. net = split_conv( net, num_outputs, num_ways=split_projection, stride=1, scope='project', normalizer_fn=normalizer_fn, activation_fn=tf.identity) if endpoints is not None: endpoints['projection_output'] = net if depthwise_location == 'output': if use_explicit_padding: net = _fixed_padding(net, kernel_size, rate) net = depthwise_func(net, activation_fn=None) if callable(residual): # custom residual net = residual(input_tensor=input_tensor, output_tensor=net) elif ( residual and # stride check enforces that we don't add residuals when spatial # dimensions are None stride == 1 and # Depth matches net.get_shape().as_list()[3] == input_tensor.get_shape().as_list()[3]): net += input_tensor return tf.identity(net, name='output')
def style_prediction(style_input_, activation_names, activation_depths, is_training=True, trainable=True, inception_end_point='Mixed_6e', style_prediction_bottleneck=100, reuse=None): """Maps style images to the style embeddings (beta and gamma parameters). Args: style_input_: Tensor. Batch of style input images. activation_names: string. Scope names of the activations of the transformer network which are used to apply style normalization. activation_depths: Shapes of the activations of the transformer network which are used to apply style normalization. is_training: bool. Is it training phase or not? trainable: bool. Should the parameters be marked as trainable? inception_end_point: string. Specifies the endpoint to construct the inception_v3 network up to. This network is part of the style prediction network. style_prediction_bottleneck: int. Specifies the bottleneck size in the number of parameters of the style embedding. reuse: bool. Whether to reuse model parameters. Defaults to False. Returns: Tensor for the output of the style prediction network, Tensor for the bottleneck of style parameters of the style prediction network. """ with tf.name_scope('style_prediction') and tf.variable_scope( tf.get_variable_scope(), reuse=reuse): with slim.arg_scope(_inception_v3_arg_scope(is_training=is_training)): with slim.arg_scope( [slim.conv2d, slim.fully_connected, slim.batch_norm], trainable=trainable): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): _, end_points = inception_v3.inception_v3_base( style_input_, scope='InceptionV3', final_endpoint=inception_end_point) # Shape of feat_convlayer is (batch_size, ?, ?, depth). # For Mixed_6e end point, depth is 768, for input image size of 256x265 # width and height are 14x14. feat_convlayer = end_points[inception_end_point] with tf.name_scope('bottleneck'): # (batch_size, 1, 1, depth). bottleneck_feat = tf.reduce_mean(feat_convlayer, axis=[1, 2], keep_dims=True) if style_prediction_bottleneck > 0: with slim.arg_scope([slim.conv2d], activation_fn=None, normalizer_fn=None, trainable=trainable): # (batch_size, 1, 1, style_prediction_bottleneck). bottleneck_feat = slim.conv2d(bottleneck_feat, style_prediction_bottleneck, [1, 1]) style_params = {} with tf.variable_scope('style_params'): for i in range(len(activation_depths)): with tf.variable_scope(activation_names[i], reuse=reuse): with slim.arg_scope([slim.conv2d], activation_fn=None, normalizer_fn=None, trainable=trainable): # Computing beta parameter of the style normalization for the # activation_names[i] layer of the style transformer network. # (batch_size, 1, 1, activation_depths[i]) beta = slim.conv2d(bottleneck_feat, activation_depths[i], [1, 1]) # (batch_size, activation_depths[i]) beta = tf.squeeze(beta, [1, 2], name='SpatialSqueeze') style_params['{}/beta'.format( activation_names[i])] = beta # Computing gamma parameter of the style normalization for the # activation_names[i] layer of the style transformer network. # (batch_size, 1, 1, activation_depths[i]) gamma = slim.conv2d(bottleneck_feat, activation_depths[i], [1, 1]) # (batch_size, activation_depths[i]) gamma = tf.squeeze(gamma, [1, 2], name='SpatialSqueeze') style_params['{}/gamma'.format( activation_names[i])] = gamma return style_params, bottleneck_feat
def inception_v3(inputs, dropout_keep_prob=0.8, num_classes=1000, is_training=True, restore_logits=True, scope=''): """Latest Inception from http://arxiv.org/abs/1512.00567. "Rethinking the Inception Architecture for Computer Vision" Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens, Zbigniew Wojna Args: inputs: a tensor of size [batch_size, height, width, channels]. dropout_keep_prob: dropout keep_prob. num_classes: number of predicted classes. is_training: whether is training or not. restore_logits: whether or not the logits layers should be restored. Useful for fine-tuning a model with different num_classes. scope: Optional scope for name_scope. Returns: a list containing 'logits', 'aux_logits' Tensors. """ # end_points will collect relevant activations for external use, for example # summaries or losses. end_points = {} with tf.name_scope(scope, 'inception_v3', [inputs]): with scopes.arg_scope( [ops.conv2d, ops.fc, ops.batch_norm, ops.dropout], is_training=is_training): with scopes.arg_scope([ops.conv2d, ops.max_pool, ops.avg_pool], stride=1, padding='VALID'): # 299 x 299 x 3 end_points['conv0'] = ops.conv2d(inputs, 32, [3, 3], stride=2, scope='conv0') # 149 x 149 x 32 end_points['conv1'] = ops.conv2d(end_points['conv0'], 32, [3, 3], scope='conv1') # 147 x 147 x 32 end_points['conv2'] = ops.conv2d(end_points['conv1'], 64, [3, 3], padding='SAME', scope='conv2') # 147 x 147 x 64 end_points['pool1'] = ops.max_pool(end_points['conv2'], [3, 3], stride=2, scope='pool1') # 73 x 73 x 64 end_points['conv3'] = ops.conv2d(end_points['pool1'], 80, [1, 1], scope='conv3') # 73 x 73 x 80. end_points['conv4'] = ops.conv2d(end_points['conv3'], 192, [3, 3], scope='conv4') # 71 x 71 x 192. end_points['pool2'] = ops.max_pool(end_points['conv4'], [3, 3], stride=2, scope='pool2') # 35 x 35 x 192. net = end_points['pool2'] # Inception blocks with scopes.arg_scope([ops.conv2d, ops.max_pool, ops.avg_pool], stride=1, padding='SAME'): # mixed: 35 x 35 x 256. with tf.variable_scope('mixed_35x35x256a'): with tf.variable_scope('branch1x1'): branch1x1 = ops.conv2d(net, 64, [1, 1]) with tf.variable_scope('branch5x5'): branch5x5 = ops.conv2d(net, 48, [1, 1]) branch5x5 = ops.conv2d(branch5x5, 64, [5, 5]) with tf.variable_scope('branch3x3dbl'): branch3x3dbl = ops.conv2d(net, 64, [1, 1]) branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) with tf.variable_scope('branch_pool'): branch_pool = ops.avg_pool(net, [3, 3]) branch_pool = ops.conv2d(branch_pool, 32, [1, 1]) net = tf.concat( [branch1x1, branch5x5, branch3x3dbl, branch_pool], 3) end_points['mixed_35x35x256a'] = net # mixed_1: 35 x 35 x 288. with tf.variable_scope('mixed_35x35x288a'): with tf.variable_scope('branch1x1'): branch1x1 = ops.conv2d(net, 64, [1, 1]) with tf.variable_scope('branch5x5'): branch5x5 = ops.conv2d(net, 48, [1, 1]) branch5x5 = ops.conv2d(branch5x5, 64, [5, 5]) with tf.variable_scope('branch3x3dbl'): branch3x3dbl = ops.conv2d(net, 64, [1, 1]) branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) with tf.variable_scope('branch_pool'): branch_pool = ops.avg_pool(net, [3, 3]) branch_pool = ops.conv2d(branch_pool, 64, [1, 1]) net = tf.concat( [branch1x1, branch5x5, branch3x3dbl, branch_pool], 3) end_points['mixed_35x35x288a'] = net # mixed_2: 35 x 35 x 288. with tf.variable_scope('mixed_35x35x288b'): with tf.variable_scope('branch1x1'): branch1x1 = ops.conv2d(net, 64, [1, 1]) with tf.variable_scope('branch5x5'): branch5x5 = ops.conv2d(net, 48, [1, 1]) branch5x5 = ops.conv2d(branch5x5, 64, [5, 5]) with tf.variable_scope('branch3x3dbl'): branch3x3dbl = ops.conv2d(net, 64, [1, 1]) branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) with tf.variable_scope('branch_pool'): branch_pool = ops.avg_pool(net, [3, 3]) branch_pool = ops.conv2d(branch_pool, 64, [1, 1]) net = tf.concat( [branch1x1, branch5x5, branch3x3dbl, branch_pool], 3) end_points['mixed_35x35x288b'] = net # mixed_3: 17 x 17 x 768. with tf.variable_scope('mixed_17x17x768a'): with tf.variable_scope('branch3x3'): branch3x3 = ops.conv2d(net, 384, [3, 3], stride=2, padding='VALID') with tf.variable_scope('branch3x3dbl'): branch3x3dbl = ops.conv2d(net, 64, [1, 1]) branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3], stride=2, padding='VALID') with tf.variable_scope('branch_pool'): branch_pool = ops.max_pool(net, [3, 3], stride=2, padding='VALID') net = tf.concat([branch3x3, branch3x3dbl, branch_pool], 3) end_points['mixed_17x17x768a'] = net # mixed4: 17 x 17 x 768. with tf.variable_scope('mixed_17x17x768b'): with tf.variable_scope('branch1x1'): branch1x1 = ops.conv2d(net, 192, [1, 1]) with tf.variable_scope('branch7x7'): branch7x7 = ops.conv2d(net, 128, [1, 1]) branch7x7 = ops.conv2d(branch7x7, 128, [1, 7]) branch7x7 = ops.conv2d(branch7x7, 192, [7, 1]) with tf.variable_scope('branch7x7dbl'): branch7x7dbl = ops.conv2d(net, 128, [1, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 128, [7, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 128, [1, 7]) branch7x7dbl = ops.conv2d(branch7x7dbl, 128, [7, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7]) with tf.variable_scope('branch_pool'): branch_pool = ops.avg_pool(net, [3, 3]) branch_pool = ops.conv2d(branch_pool, 192, [1, 1]) net = tf.concat( [branch1x1, branch7x7, branch7x7dbl, branch_pool], 3) end_points['mixed_17x17x768b'] = net # mixed_5: 17 x 17 x 768. with tf.variable_scope('mixed_17x17x768c'): with tf.variable_scope('branch1x1'): branch1x1 = ops.conv2d(net, 192, [1, 1]) with tf.variable_scope('branch7x7'): branch7x7 = ops.conv2d(net, 160, [1, 1]) branch7x7 = ops.conv2d(branch7x7, 160, [1, 7]) branch7x7 = ops.conv2d(branch7x7, 192, [7, 1]) with tf.variable_scope('branch7x7dbl'): branch7x7dbl = ops.conv2d(net, 160, [1, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [1, 7]) branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7]) with tf.variable_scope('branch_pool'): branch_pool = ops.avg_pool(net, [3, 3]) branch_pool = ops.conv2d(branch_pool, 192, [1, 1]) net = tf.concat( [branch1x1, branch7x7, branch7x7dbl, branch_pool], 3) end_points['mixed_17x17x768c'] = net # mixed_6: 17 x 17 x 768. with tf.variable_scope('mixed_17x17x768d'): with tf.variable_scope('branch1x1'): branch1x1 = ops.conv2d(net, 192, [1, 1]) with tf.variable_scope('branch7x7'): branch7x7 = ops.conv2d(net, 160, [1, 1]) branch7x7 = ops.conv2d(branch7x7, 160, [1, 7]) branch7x7 = ops.conv2d(branch7x7, 192, [7, 1]) with tf.variable_scope('branch7x7dbl'): branch7x7dbl = ops.conv2d(net, 160, [1, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [1, 7]) branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7]) with tf.variable_scope('branch_pool'): branch_pool = ops.avg_pool(net, [3, 3]) branch_pool = ops.conv2d(branch_pool, 192, [1, 1]) net = tf.concat( [branch1x1, branch7x7, branch7x7dbl, branch_pool], 3) end_points['mixed_17x17x768d'] = net # mixed_7: 17 x 17 x 768. with tf.variable_scope('mixed_17x17x768e'): with tf.variable_scope('branch1x1'): branch1x1 = ops.conv2d(net, 192, [1, 1]) with tf.variable_scope('branch7x7'): branch7x7 = ops.conv2d(net, 192, [1, 1]) branch7x7 = ops.conv2d(branch7x7, 192, [1, 7]) branch7x7 = ops.conv2d(branch7x7, 192, [7, 1]) with tf.variable_scope('branch7x7dbl'): branch7x7dbl = ops.conv2d(net, 192, [1, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [7, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7]) branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [7, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7]) with tf.variable_scope('branch_pool'): branch_pool = ops.avg_pool(net, [3, 3]) branch_pool = ops.conv2d(branch_pool, 192, [1, 1]) net = tf.concat( [branch1x1, branch7x7, branch7x7dbl, branch_pool], 3) end_points['mixed_17x17x768e'] = net # Auxiliary Head logits aux_logits = tf.identity(end_points['mixed_17x17x768e']) with tf.variable_scope('aux_logits'): aux_logits = ops.avg_pool(aux_logits, [5, 5], stride=3, padding='VALID') aux_logits = ops.conv2d(aux_logits, 128, [1, 1], scope='proj') # Shape of feature map before the final layer. shape = aux_logits.get_shape() aux_logits = ops.conv2d(aux_logits, 768, shape[1:3], stddev=0.01, padding='VALID') aux_logits = ops.flatten(aux_logits) aux_logits = ops.fc(aux_logits, num_classes, activation=None, stddev=0.001, restore=restore_logits) end_points['aux_logits'] = aux_logits # mixed_8: 8 x 8 x 1280. # Note that the scope below is not changed to not void previous # checkpoints. # (TODO) Fix the scope when appropriate. with tf.variable_scope('mixed_17x17x1280a'): with tf.variable_scope('branch3x3'): branch3x3 = ops.conv2d(net, 192, [1, 1]) branch3x3 = ops.conv2d(branch3x3, 320, [3, 3], stride=2, padding='VALID') with tf.variable_scope('branch7x7x3'): branch7x7x3 = ops.conv2d(net, 192, [1, 1]) branch7x7x3 = ops.conv2d(branch7x7x3, 192, [1, 7]) branch7x7x3 = ops.conv2d(branch7x7x3, 192, [7, 1]) branch7x7x3 = ops.conv2d(branch7x7x3, 192, [3, 3], stride=2, padding='VALID') with tf.variable_scope('branch_pool'): branch_pool = ops.max_pool(net, [3, 3], stride=2, padding='VALID') net = tf.concat([branch3x3, branch7x7x3, branch_pool], 3) end_points['mixed_17x17x1280a'] = net # mixed_9: 8 x 8 x 2048. with tf.variable_scope('mixed_8x8x2048a'): with tf.variable_scope('branch1x1'): branch1x1 = ops.conv2d(net, 320, [1, 1]) with tf.variable_scope('branch3x3'): branch3x3 = ops.conv2d(net, 384, [1, 1]) branch3x3 = tf.concat([ ops.conv2d(branch3x3, 384, [1, 3]), ops.conv2d(branch3x3, 384, [3, 1]) ], 3) with tf.variable_scope('branch3x3dbl'): branch3x3dbl = ops.conv2d(net, 448, [1, 1]) branch3x3dbl = ops.conv2d(branch3x3dbl, 384, [3, 3]) branch3x3dbl = tf.concat([ ops.conv2d(branch3x3dbl, 384, [1, 3]), ops.conv2d(branch3x3dbl, 384, [3, 1]) ], 3) with tf.variable_scope('branch_pool'): branch_pool = ops.avg_pool(net, [3, 3]) branch_pool = ops.conv2d(branch_pool, 192, [1, 1]) net = tf.concat( [branch1x1, branch3x3, branch3x3dbl, branch_pool], 3) end_points['mixed_8x8x2048a'] = net # mixed_10: 8 x 8 x 2048. with tf.variable_scope('mixed_8x8x2048b'): with tf.variable_scope('branch1x1'): branch1x1 = ops.conv2d(net, 320, [1, 1]) with tf.variable_scope('branch3x3'): branch3x3 = ops.conv2d(net, 384, [1, 1]) branch3x3 = tf.concat([ ops.conv2d(branch3x3, 384, [1, 3]), ops.conv2d(branch3x3, 384, [3, 1]) ], 3) with tf.variable_scope('branch3x3dbl'): branch3x3dbl = ops.conv2d(net, 448, [1, 1]) branch3x3dbl = ops.conv2d(branch3x3dbl, 384, [3, 3]) branch3x3dbl = tf.concat([ ops.conv2d(branch3x3dbl, 384, [1, 3]), ops.conv2d(branch3x3dbl, 384, [3, 1]) ], 3) with tf.variable_scope('branch_pool'): branch_pool = ops.avg_pool(net, [3, 3]) branch_pool = ops.conv2d(branch_pool, 192, [1, 1]) net = tf.concat( [branch1x1, branch3x3, branch3x3dbl, branch_pool], 3) end_points['mixed_8x8x2048b'] = net # Final pooling and prediction with tf.variable_scope('logits'): shape = net.get_shape() net = ops.avg_pool(net, shape[1:3], padding='VALID', scope='pool') # 1 x 1 x 2048 net = ops.dropout(net, dropout_keep_prob, scope='dropout') net = ops.flatten(net, scope='flatten') # 2048 logits = ops.fc(net, num_classes, activation=None, scope='logits', restore=restore_logits) # 1000 end_points['logits'] = logits end_points['predictions'] = tf.nn.softmax( logits, name='predictions') return logits, end_points
def tf_d_suplu(x, name=None): with tf.name_scope(name, "d_suplu", [x]) as name: y = tf.py_func(np_d_suplu_32, [x], [tf.float32], name=name, stateful=False) return y[0]
def initialize(self, name=None): with tf.name_scope(name, "TrainingHelperInitialize"): finished = tf.equal(0, self._sequence_length) return (finished, self._zero_inputs)
def sample(self, time, outputs, name=None, **unused_kwargs): with tf.name_scope(name, "TrainingHelperSample", [time, outputs]): sample_ids = tf.cast(tf.argmax(outputs, axis=-1), tf.dtypes.int32) return sample_ids
def resize_to_range(image, masks=None, min_dimension=None, max_dimension=None, method=tf.image.ResizeMethod.BILINEAR, align_corners=False, pad_to_max_dimension=False): """Resizes an image so its dimensions are within the provided value. The output size can be described by two cases: 1. If the image can be rescaled so its minimum dimension is equal to the provided value without the other dimension exceeding max_dimension, then do so. 2. Otherwise, resize so the largest dimension is equal to max_dimension. Args: image: A 3D tensor of shape [height, width, channels] masks: (optional) rank 3 float32 tensor with shape [num_instances, height, width] containing instance masks. min_dimension: (optional) (scalar) desired size of the smaller image dimension. max_dimension: (optional) (scalar) maximum allowed size of the larger image dimension. method: (optional) interpolation method used in resizing. Defaults to BILINEAR. align_corners: bool. If true, exactly align all 4 corners of the input and output. Defaults to False. pad_to_max_dimension: Whether to resize the image and pad it with zeros so the resulting image is of the spatial size [max_dimension, max_dimension]. If masks are included they are padded similarly. Returns: Note that the position of the resized_image_shape changes based on whether masks are present. resized_image: A 3D tensor of shape [new_height, new_width, channels], where the image has been resized (with bilinear interpolation) so that min(new_height, new_width) == min_dimension or max(new_height, new_width) == max_dimension. resized_masks: If masks is not None, also outputs masks. A 3D tensor of shape [num_instances, new_height, new_width]. resized_image_shape: A 1D tensor of shape [3] containing shape of the resized image. Raises: ValueError: if the image is not a 3D tensor. """ if len(image.get_shape()) != 3: raise ValueError('Image should be 3D tensor') with tf.name_scope('ResizeToRange', values=[image, min_dimension]): if image.get_shape().is_fully_defined(): new_size = _compute_new_static_size(image, min_dimension, max_dimension) else: new_size = _compute_new_dynamic_size(image, min_dimension, max_dimension) new_image = tf.image.resize_images(image, new_size[:-1], method=method, align_corners=align_corners) if pad_to_max_dimension: new_image = tf.image.pad_to_bounding_box(new_image, 0, 0, max_dimension, max_dimension) result = [new_image] if masks is not None: new_masks = tf.expand_dims(masks, 3) new_masks = tf.image.resize_images( new_masks, new_size[:-1], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR, align_corners=align_corners) new_masks = tf.squeeze(new_masks, 3) if pad_to_max_dimension: new_masks = tf.image.pad_to_bounding_box( new_masks, 0, 0, max_dimension, max_dimension) result.append(new_masks) result.append(new_size) return result
def _model_fn(features, labels, mode, params, model, variable_filter_fn=None): """Model definition entry. Args: features: the input image tensor with shape [batch_size, height, width, 3]. The height and width are fixed and equal. labels: the input labels in a dictionary. The labels include class targets and box targets which are dense label maps. The labels are generated from get_input_fn function in data/dataloader.py mode: the mode of TPUEstimator including TRAIN, EVAL, and PREDICT. params: the dictionary defines hyperparameters of model. The default settings are in default_hparams function in this file. model: the model outputs class logits and box regression outputs. variable_filter_fn: the filter function that takes trainable_variables and returns the variable list after applying the filter rule. Returns: tpu_spec: the TPUEstimatorSpec to run training, evaluation, or prediction. Raises: RuntimeError: if both ckpt and backbone_ckpt are set. """ utils.image('input_image', features) training_hooks = [] if params['data_format'] == 'channels_first': features = tf.transpose(features, [0, 3, 1, 2]) def _model_outputs(inputs): # Convert params (dict) to Config for easier access. return model(inputs, config=hparams_config.Config(params)) precision = utils.get_precision(params['strategy'], params['mixed_precision']) cls_outputs, box_outputs = utils.build_model_with_precision( precision, _model_outputs, features, params['is_training_bn']) levels = cls_outputs.keys() for level in levels: cls_outputs[level] = tf.cast(cls_outputs[level], tf.float32) box_outputs[level] = tf.cast(box_outputs[level], tf.float32) # First check if it is in PREDICT mode. if mode == tf.estimator.ModeKeys.PREDICT: predictions = { 'image': features, } for level in levels: predictions['cls_outputs_%d' % level] = cls_outputs[level] predictions['box_outputs_%d' % level] = box_outputs[level] return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) # Set up training loss and learning rate. update_learning_rate_schedule_parameters(params) global_step = tf.train.get_or_create_global_step() learning_rate = learning_rate_schedule(params, global_step) # cls_loss and box_loss are for logging. only total_loss is optimized. det_loss, cls_loss, box_loss, box_iou_loss = detection_loss( cls_outputs, box_outputs, labels, params) reg_l2loss = reg_l2_loss(params['weight_decay']) total_loss = det_loss + reg_l2loss if mode == tf.estimator.ModeKeys.TRAIN: utils.scalar('lrn_rate', learning_rate) utils.scalar('trainloss/cls_loss', cls_loss) utils.scalar('trainloss/box_loss', box_loss) utils.scalar('trainloss/det_loss', det_loss) utils.scalar('trainloss/reg_l2_loss', reg_l2loss) utils.scalar('trainloss/loss', total_loss) if params['iou_loss_type']: utils.scalar('trainloss/box_iou_loss', box_iou_loss) moving_average_decay = params['moving_average_decay'] if moving_average_decay: ema = tf.train.ExponentialMovingAverage(decay=moving_average_decay, num_updates=global_step) ema_vars = utils.get_ema_vars() if params['strategy'] == 'horovod': import horovod.tensorflow as hvd # pylint: disable=g-import-not-at-top learning_rate = learning_rate * hvd.size() if mode == tf.estimator.ModeKeys.TRAIN: if params['optimizer'].lower() == 'sgd': optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=params['momentum']) elif params['optimizer'].lower() == 'adam': optimizer = tf.train.AdamOptimizer(learning_rate) else: raise ValueError('optimizers should be adam or sgd') if params['strategy'] == 'tpu': optimizer = tf.tpu.CrossShardOptimizer(optimizer) elif params['strategy'] == 'horovod': optimizer = hvd.DistributedOptimizer(optimizer) training_hooks = [hvd.BroadcastGlobalVariablesHook(0)] # Batch norm requires update_ops to be added as a train_op dependency. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) var_list = tf.trainable_variables() if variable_filter_fn: var_list = variable_filter_fn(var_list) if params.get('clip_gradients_norm', 0) > 0: logging.info('clip gradients norm by %f', params['clip_gradients_norm']) grads_and_vars = optimizer.compute_gradients(total_loss, var_list) with tf.name_scope('clip'): grads = [gv[0] for gv in grads_and_vars] tvars = [gv[1] for gv in grads_and_vars] clipped_grads, gnorm = tf.clip_by_global_norm( grads, params['clip_gradients_norm']) utils.scalar('gnorm', gnorm) grads_and_vars = list(zip(clipped_grads, tvars)) with tf.control_dependencies(update_ops): train_op = optimizer.apply_gradients(grads_and_vars, global_step) else: with tf.control_dependencies(update_ops): train_op = optimizer.minimize(total_loss, global_step, var_list=var_list) if moving_average_decay: with tf.control_dependencies([train_op]): train_op = ema.apply(ema_vars) else: train_op = None eval_metrics = None if mode == tf.estimator.ModeKeys.EVAL: def metric_fn(**kwargs): """Returns a dictionary that has the evaluation metrics.""" batch_size = params['batch_size'] if params['strategy'] == 'tpu': batch_size = params['batch_size'] * params['num_shards'] eval_anchors = anchors.Anchors(params['min_level'], params['max_level'], params['num_scales'], params['aspect_ratios'], params['anchor_scale'], params['image_size']) anchor_labeler = anchors.AnchorLabeler(eval_anchors, params['num_classes']) cls_loss = tf.metrics.mean(kwargs['cls_loss_repeat']) box_loss = tf.metrics.mean(kwargs['box_loss_repeat']) if params.get('testdev_dir', None): logging.info('Eval testdev_dir %s', params['testdev_dir']) coco_metrics = coco_metric_fn( batch_size, anchor_labeler, params['val_json_file'], testdev_dir=params['testdev_dir'], nms_configs=params['nms_configs'], **kwargs) else: logging.info('Eval val with groudtruths %s.', params['val_json_file']) coco_metrics = coco_metric_fn( batch_size, anchor_labeler, params['val_json_file'], nms_configs=params['nms_configs'], **kwargs) # Add metrics to output. output_metrics = { 'cls_loss': cls_loss, 'box_loss': box_loss, } output_metrics.update(coco_metrics) return output_metrics cls_loss_repeat = tf.reshape( tf.tile(tf.expand_dims(cls_loss, 0), [ params['batch_size'], ]), [params['batch_size'], 1]) box_loss_repeat = tf.reshape( tf.tile(tf.expand_dims(box_loss, 0), [ params['batch_size'], ]), [params['batch_size'], 1]) metric_fn_inputs = { 'cls_loss_repeat': cls_loss_repeat, 'box_loss_repeat': box_loss_repeat, 'source_ids': labels['source_ids'], 'groundtruth_data': labels['groundtruth_data'], 'image_scales': labels['image_scales'], } add_metric_fn_inputs(params, cls_outputs, box_outputs, metric_fn_inputs) eval_metrics = (metric_fn, metric_fn_inputs) checkpoint = params.get('ckpt') or params.get('backbone_ckpt') if checkpoint and mode == tf.estimator.ModeKeys.TRAIN: # Initialize the model from an EfficientDet or backbone checkpoint. if params.get('ckpt') and params.get('backbone_ckpt'): raise RuntimeError( '--backbone_ckpt and --checkpoint are mutually exclusive') if params.get('backbone_ckpt'): var_scope = params['backbone_name'] + '/' if params['ckpt_var_scope'] is None: # Use backbone name as default checkpoint scope. ckpt_scope = params['backbone_name'] + '/' else: ckpt_scope = params['ckpt_var_scope'] + '/' else: # Load every var in the given checkpoint var_scope = ckpt_scope = '/' def scaffold_fn(): """Loads pretrained model through scaffold function.""" logging.info('restore variables from %s', checkpoint) var_map = utils.get_ckpt_var_map(ckpt_path=checkpoint, ckpt_scope=ckpt_scope, var_scope=var_scope, var_exclude_expr=params.get( 'var_exclude_expr', None)) tf.train.init_from_checkpoint(checkpoint, var_map) return tf.train.Scaffold() elif mode == tf.estimator.ModeKeys.EVAL and moving_average_decay: def scaffold_fn(): """Load moving average variables for eval.""" logging.info('Load EMA vars with ema_decay=%f', moving_average_decay) restore_vars_dict = ema.variables_to_restore(ema_vars) saver = tf.train.Saver(restore_vars_dict) return tf.train.Scaffold(saver=saver) else: scaffold_fn = None if params['strategy'] != 'tpu': # Profile every 1K steps. profile_hook = tf.train.ProfilerHook(save_steps=1000, output_dir=params['model_dir']) training_hooks.append(profile_hook) # Report memory allocation if OOM class OomReportingHook(tf.estimator.SessionRunHook): def before_run(self, run_context): return tf.estimator.SessionRunArgs( fetches=[], options=tf.RunOptions( report_tensor_allocations_upon_oom=True)) training_hooks.append(OomReportingHook()) return tf.estimator.tpu.TPUEstimatorSpec(mode=mode, loss=total_loss, train_op=train_op, eval_metrics=eval_metrics, host_call=utils.get_tpu_host_call( global_step, params), scaffold_fn=scaffold_fn, training_hooks=training_hooks)
def evaluate(): """Evaluating function.""" g = tf.Graph() ops_dict = {} with g.as_default(): # Data set. if FLAGS.experiment_type == "mnist": config = mnist_config.ConfigDict() dataset = mnist.MNIST(data_dir=FLAGS.data_dir, subset=FLAGS.subset, batch_size=FLAGS.batch_size, is_training=False) model = mnist_model.MNISTNetwork(config) layers_names = [ "conv_layer%d" % i for i in range(len(config.filter_sizes_conv_layers)) ] images, labels, num_examples, num_classes = (dataset.images, dataset.labels, dataset.num_examples, dataset.num_classes) logits, endpoints = model(images, is_training=False) layers_list = [images] + [endpoints[name] for name in layers_names] top1_op = tf.nn.in_top_k(logits, labels, 1) top1_op = tf.cast(top1_op, dtype=tf.float32) ops_dict["top1"] = (None, top1_op) accuracy_ph = tf.placeholder(tf.float32, None) ops_dict["top1_accuracy"] = (accuracy_ph, None) tf.summary.scalar("top1_accuracy", accuracy_ph) with tf.name_scope("optimizer"): global_step = tf.train.get_or_create_global_step() # Define losses. l2_loss_wt = config.l2_loss_wt xent_loss_wt = config.xent_loss_wt margin_loss_wt = config.margin_loss_wt gamma = config.gamma alpha = config.alpha top_k = config.top_k dist_norm = config.dist_norm with tf.name_scope("losses"): xent_loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels)) margin = margin_loss.large_margin( logits=logits, one_hot_labels=tf.one_hot(labels, num_classes), layers_list=layers_list, gamma=gamma, alpha_factor=alpha, top_k=top_k, dist_norm=dist_norm, epsilon=1e-6, layers_weights=[ np.prod(layer.get_shape().as_list()[1:]) for layer in layers_list ] if np.isinf(dist_norm) else None) l2_loss = 0. for v in tf.trainable_variables(): tf.logging.info(v) l2_loss += tf.nn.l2_loss(v) total_loss = 0 if xent_loss_wt > 0: total_loss += xent_loss_wt * xent_loss if margin_loss_wt > 0: total_loss += margin_loss_wt * margin if l2_loss_wt > 0: total_loss += l2_loss_wt * l2_loss xent_loss_ph = tf.placeholder(tf.float32, None) margin_loss_ph = tf.placeholder(tf.float32, None) l2_loss_ph = tf.placeholder(tf.float32, None) total_loss_ph = tf.placeholder(tf.float32, None) tf.summary.scalar("xent_loss", xent_loss_ph) tf.summary.scalar("margin_loss", margin_loss_ph) tf.summary.scalar("l2_loss", l2_loss_ph) tf.summary.scalar("total_loss", total_loss_ph) ops_dict["losses/xent_loss"] = (xent_loss_ph, xent_loss) ops_dict["losses/margin_loss"] = (margin_loss_ph, margin) ops_dict["losses/l2_loss"] = (l2_loss_ph, l2_loss) ops_dict["losses/total_loss"] = (total_loss_ph, total_loss) # Prepare evaluation session. merged_summary = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, tf.get_default_graph()) vars_to_save = tf.global_variables() saver = tf.train.Saver(var_list=vars_to_save) scaffold = tf.train.Scaffold(saver=saver) session_creator = tf.train.ChiefSessionCreator( scaffold=scaffold, checkpoint_dir=FLAGS.checkpoint_dir) while True: _eval_once(session_creator, ops_dict, summary_writer, merged_summary, global_step, num_examples) if FLAGS.run_once: break time.sleep(FLAGS.eval_interval_secs)
def _update_mask(self, weights, threshold, gradients): # pylint: disable=unused-argument """Updates the mask for a given weight tensor. This functions first computes the cdf of the weight tensor, and estimates the threshold value such that 'desired_sparsity' fraction of weights have magnitude less than the threshold. Args: weights: The weight tensor that needs to be masked. threshold: The current threshold value. The function will compute a new threshold and return the exponential moving average using the current value of threshold gradients: The gradient tensor that is used for salience calculation. Returns: new_threshold: The new value of the threshold based on weights, and sparsity at the current global_step new_mask: A numpy array of the same size and shape as weights containing 0 or 1 to indicate which of the values in weights falls below the threshold Raises: ValueError: if sparsity is not defined """ if self._sparsity is None: raise ValueError('Sparsity variable undefined') sparsity = self._get_sparsity(weights.op.name) with tf.name_scope(weights.op.name + '_pruning_ops'): tf.logging.info('Applying option %s pruning', self._spec.prune_option) if self._spec.prune_option == 'weight': abs_weights = tf.abs(weights) elif self._spec.prune_option in ('first_order_gradient', 'second_order_gradient'): if gradients is None: raise ValueError('gradient tensor cannot be None.') # gradient variable stores absolute value already abs_weights = tf.multiply(tf.abs(weights), gradients) else: raise ValueError('undefined option') k = tf.cast( tf.round( tf.cast(tf.size(abs_weights), tf.float32) * (1 - sparsity)), tf.int32) # Generate a random shuffling of the weights s.t. the tie-breaker on # weight magnitude is random uniform. shuffling = tf.random_shuffle(tf.range(tf.size(abs_weights))) shuffling = tf.reshape(shuffling, [-1, 1]) # Flatten the weights and scatter the values randomly. abs_weights = tf.reshape(abs_weights, [-1]) abs_weights = tf.scatter_nd(shuffling, abs_weights, tf.shape(abs_weights)) # Sort the entire array _, indices = tf.nn.top_k(abs_weights, k=tf.size(abs_weights)) # `k` is how many non-zero weights we're going to have. Create a new # mask where the first `k` elements are set to one and all others are # set to zero. mask_staging = tf.range(tf.size(abs_weights)) mask_staging = tf.cast(tf.less(mask_staging, k), tf.float32) # Scatter the mask back into the proper positions for the weight matrix. indices = tf.reshape(indices, [-1, 1]) new_mask = tf.scatter_nd(indices, mask_staging, tf.shape(mask_staging)) # Un-shuffle the newly created mask. new_mask = tf.reshape(tf.gather_nd(new_mask, shuffling), tf.shape(weights)) return tf.constant(0, tf.float32), new_mask
# validation_images = vec1[:, :-2][:VALIDATION_SIZE] # validation_labels = vec1[:, -2:][:VALIDATION_SIZE] train = vec1[:, :][test_size:] train_images = train[:, :-2] train_labels = train[:, -2:] # test_images = vec1[:, :-2][VALIDATION_SIZE:test_size] # test_labels = vec1[:, -2:][VALIDATION_SIZE:test_size] test_images = vec1[:, :-2][:test_size] test_labels = vec1[:, -2:][:test_size] import tensorflow.compat.v1 as tf tf.disable_v2_behavior() with tf.name_scope('inputs'): x = tf.placeholder(tf.float32, [None, 100], name='x_input') y_ = tf.placeholder(tf.float32, [None, 2], name='y_input') ##reshape image数据 #To apply the layer, we first reshape x to a 4d tensor, with the second and third dimensions corresponding to image width and height, #and the final dimension corresponding to the number of color channels.-1表示任意数量的样本数 x_image = tf.reshape(x, [-1, 10, 10, 1]) ##定义weights和bias #----Weight Initialization---# #One should generally initialize weights with a small amount of noise for symmetry breaking, and to prevent 0 gradients def weight_variable(shape): initial = tf.truncated_normal(shape, stddev=0.1) return tf.Variable(initial)
def focal_loss( logits, targets, alpha, gamma, normalizer, ): """Compute the focal loss between `logits` and the golden `target` values. Focal loss = -(1-pt)^gamma * log(pt) where pt is the probability of being classified to the true class. Args: logits: A float32 tensor of size [batch, height_in, width_in, num_predictions]. targets: A float32 tensor of size [batch, height_in, width_in, num_predictions]. alpha: A float32 scalar multiplying alpha to the loss from positive examples and (1-alpha) to the loss from negative examples. gamma: A float32 scalar modulating loss from hard and easy examples. normalizer: A float32 scalar normalizes the total loss from all examples. Returns: loss: A float32 Tensor of size [batch, height_in, width_in, num_predictions] representing normalized loss on the prediction map. """ with tf.name_scope('focal_loss'): positive_label_mask = tf.equal(targets, 1.0) cross_entropy = (tf.nn.sigmoid_cross_entropy_with_logits( labels=targets, logits=logits)) # Below are comments/derivations for computing modulator. # For brevity, let x = logits, z = targets, r = gamma, and p_t = sigmod(x) # for positive samples and 1 - sigmoid(x) for negative examples. # # The modulator, defined as (1 - P_t)^r, is a critical part in focal loss # computation. For r > 0, it puts more weights on hard examples, and less # weights on easier ones. However if it is directly computed as (1 - P_t)^r, # its back-propagation is not stable when r < 1. The implementation here # resolves the issue. # # For positive samples (labels being 1), # (1 - p_t)^r # = (1 - sigmoid(x))^r # = (1 - (1 / (1 + exp(-x))))^r # = (exp(-x) / (1 + exp(-x)))^r # = exp(log((exp(-x) / (1 + exp(-x)))^r)) # = exp(r * log(exp(-x)) - r * log(1 + exp(-x))) # = exp(- r * x - r * log(1 + exp(-x))) # # For negative samples (labels being 0), # (1 - p_t)^r # = (sigmoid(x))^r # = (1 / (1 + exp(-x)))^r # = exp(log((1 / (1 + exp(-x)))^r)) # = exp(-r * log(1 + exp(-x))) # # Therefore one unified form for positive (z = 1) and negative (z = 0) # samples is: # (1 - p_t)^r = exp(-r * z * x - r * log(1 + exp(-x))). neg_logits = -1.0 * logits modulator = tf.exp(gamma * targets * neg_logits - gamma * tf.math.softplus(neg_logits)) loss = modulator * cross_entropy weighted_loss = tf.where(positive_label_mask, alpha * loss, (1.0 - alpha) * loss) weighted_loss /= normalizer + 1e-20 return weighted_loss
def mask_head(roi_features, class_indices, num_classes=91, mrcnn_resolution=28, is_gpu_inference=False): """Mask branch for the Mask-RCNN model. Args: roi_features: A ROI feature tensor of shape [batch_size, num_rois, height_l, width_l, num_filters]. class_indices: a Tensor of shape [batch_size, num_rois], indicating which class the ROI is. num_classes: an integer for the number of classes. mrcnn_resolution: an integer that is the resolution of masks. is_gpu_inference: whether to build the model for GPU inference. Returns: mask_outputs: a tensor with a shape of [batch_size, num_masks, mask_height, mask_width], representing the mask predictions. """ def _get_stddev_equivalent_to_msra_fill(kernel_size, fan_out): """Returns the stddev of random normal initialization as MSRAFill.""" # Reference: https://github.com/pytorch/pytorch/blob/master/caffe2/operators/filler_op.h#L445-L463 # pylint: disable=line-too-long # For example, kernel size is (3, 3) and fan out is 256, stddev is 0.029. # stddev = (2/(3*3*256))^0.5 = 0.029 return (2 / (kernel_size[0] * kernel_size[1] * fan_out))**0.5 with tf.variable_scope('mask_head'): batch_size, num_rois, height, width, filters = ( roi_features.get_shape().as_list()) net = tf.reshape(roi_features, [-1, height, width, filters]) for i in range(4): kernel_size = (3, 3) fan_out = 256 init_stddev = _get_stddev_equivalent_to_msra_fill( kernel_size, fan_out) net = tf.layers.conv2d( net, fan_out, kernel_size=kernel_size, strides=(1, 1), padding='same', dilation_rate=(1, 1), activation=tf.nn.relu, kernel_initializer=tf.random_normal_initializer( stddev=init_stddev), bias_initializer=tf.zeros_initializer(), name='mask-conv-l%d' % i) kernel_size = (2, 2) fan_out = 256 init_stddev = _get_stddev_equivalent_to_msra_fill(kernel_size, fan_out) net = tf.layers.conv2d_transpose( net, fan_out, kernel_size=kernel_size, strides=(2, 2), padding='valid', activation=tf.nn.relu, kernel_initializer=tf.random_normal_initializer( stddev=init_stddev), bias_initializer=tf.zeros_initializer(), name='conv5-mask') kernel_size = (1, 1) fan_out = num_classes init_stddev = _get_stddev_equivalent_to_msra_fill(kernel_size, fan_out) mask_outputs = tf.layers.conv2d( net, fan_out, kernel_size=kernel_size, strides=(1, 1), padding='valid', kernel_initializer=tf.random_normal_initializer( stddev=init_stddev), bias_initializer=tf.zeros_initializer(), name='mask_fcn_logits') mask_outputs = tf.reshape( mask_outputs, [-1, num_rois, mrcnn_resolution, mrcnn_resolution, num_classes]) indices_dtype = tf.float32 if is_gpu_inference else tf.int32 with tf.name_scope('masks_post_processing'): mask_outputs = tf.transpose(mask_outputs, [0, 1, 4, 2, 3]) if batch_size == 1: indices = tf.reshape( tf.reshape(tf.range(num_rois, dtype=indices_dtype), [batch_size, num_rois, 1]) * num_classes + tf.expand_dims(class_indices, axis=-1), [batch_size, -1]) # If using GPU for inference, delay the cast until when Gather ops show # up since GPU inference supports float point better. # TODO(laigd): revisit this when newer versions of GPU libraries is # released. if is_gpu_inference: indices = tf.cast(indices, dtype=tf.int32) mask_outputs = tf.gather(tf.reshape( mask_outputs, [batch_size, -1, mrcnn_resolution, mrcnn_resolution]), indices, axis=1) mask_outputs = tf.squeeze(mask_outputs, axis=1) mask_outputs = tf.reshape( mask_outputs, [batch_size, num_rois, mrcnn_resolution, mrcnn_resolution]) else: batch_indices = (tf.expand_dims( tf.range(batch_size, dtype=indices_dtype), axis=1) * tf.ones([1, num_rois], dtype=indices_dtype)) mask_indices = (tf.expand_dims( tf.range(num_rois, dtype=indices_dtype), axis=0) * tf.ones([batch_size, 1], dtype=indices_dtype)) gather_indices = tf.stack( [batch_indices, mask_indices, class_indices], axis=2) if is_gpu_inference: gather_indices = tf.cast(gather_indices, dtype=tf.int32) mask_outputs = tf.gather_nd(mask_outputs, gather_indices) return mask_outputs
Wx_plus_b = tf.add(tf.matmul(inputs, Weights), biases) if activation_function is None: outputs = Wx_plus_b else: outputs = activation_function(Wx_plus_b, ) tf.summary.histogram(layer_name + '/outputs', outputs) return outputs # Make up some real data x_data = np.linspace(-1, 1, 300)[:, np.newaxis] noise = np.random.normal(0, 0.05, x_data.shape) y_data = np.square(x_data) - 0.5 + noise # define placeholder for inputs to network with tf.name_scope('inputs'): xs = tf.placeholder(tf.float32, [None, 1], name='x_input') ys = tf.placeholder(tf.float32, [None, 1], name='y_input') # add hidden layer l1 = add_layer(xs, 1, 10, n_layer=1, activation_function=tf.nn.relu) # add output layer prediction = add_layer(l1, 10, 1, n_layer=2, activation_function=None) # the error between prediciton and real data with tf.name_scope('loss'): loss = tf.reduce_mean( tf.reduce_sum(tf.square(ys - prediction), reduction_indices=[1])) tf.summary.scalar('loss', loss) with tf.name_scope('train'):
def inception_model_fn(features, labels, mode, params): """Inception v3 model using Estimator API.""" num_classes = FLAGS.num_classes is_training = (mode == tf.estimator.ModeKeys.TRAIN) is_eval = (mode == tf.estimator.ModeKeys.EVAL) if isinstance(features, dict): features = features['feature'] features = tensor_transform_fn(features, params['input_perm']) # This nested function allows us to avoid duplicating the logic which # builds the network, for different values of --precision. def build_network(): if FLAGS.precision == 'bfloat16': with contrib_tpu.bfloat16_scope(): logits, end_points = inception.inception_v3( features, num_classes, is_training=is_training) logits = tf.cast(logits, tf.float32) elif FLAGS.precision == 'float32': logits, end_points = inception.inception_v3( features, num_classes, is_training=is_training) return logits, end_points if FLAGS.clear_update_collections: # updates_collections must be set to None in order to use fused batchnorm with arg_scope(inception.inception_v3_arg_scope( weight_decay=0.0, batch_norm_decay=BATCH_NORM_DECAY, batch_norm_epsilon=BATCH_NORM_EPSILON, updates_collections=None)): logits, end_points = build_network() else: with arg_scope(inception.inception_v3_arg_scope( batch_norm_decay=BATCH_NORM_DECAY, batch_norm_epsilon=BATCH_NORM_EPSILON)): logits, end_points = build_network() predictions = { 'classes': tf.argmax(input=logits, axis=1), 'probabilities': tf.nn.softmax(logits, name='softmax_tensor') } if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, export_outputs={ 'classify': tf.estimator.export.PredictOutput(predictions) }) if mode == tf.estimator.ModeKeys.EVAL and FLAGS.display_tensors and ( not FLAGS.use_tpu): with tf.control_dependencies([ tf.Print( predictions['classes'], [predictions['classes']], summarize=FLAGS.eval_batch_size, message='prediction: ') ]): labels = tf.Print( labels, [labels], summarize=FLAGS.eval_batch_size, message='label: ') one_hot_labels = tf.one_hot(labels, FLAGS.num_classes, dtype=tf.int32) if 'AuxLogits' in end_points: tf.losses.softmax_cross_entropy( onehot_labels=one_hot_labels, logits=tf.cast(end_points['AuxLogits'], tf.float32), weights=0.4, label_smoothing=0.1, scope='aux_loss') tf.losses.softmax_cross_entropy( onehot_labels=one_hot_labels, logits=logits, weights=1.0, label_smoothing=0.1) losses = tf.add_n(tf.losses.get_losses()) l2_loss = [] for v in tf.trainable_variables(): if 'BatchNorm' not in v.name and 'weights' in v.name: l2_loss.append(tf.nn.l2_loss(v)) loss = losses + WEIGHT_DECAY * tf.add_n(l2_loss) initial_learning_rate = FLAGS.learning_rate * FLAGS.train_batch_size / 256 if FLAGS.use_learning_rate_warmup: # Adjust initial learning rate to match final warmup rate warmup_decay = FLAGS.learning_rate_decay**( (FLAGS.warmup_epochs + FLAGS.cold_epochs) / FLAGS.learning_rate_decay_epochs) adj_initial_learning_rate = initial_learning_rate * warmup_decay final_learning_rate = 0.0001 * initial_learning_rate host_call = None train_op = None if is_training: batches_per_epoch = _NUM_TRAIN_IMAGES / FLAGS.train_batch_size global_step = tf.train.get_or_create_global_step() current_epoch = tf.cast( (tf.cast(global_step, tf.float32) / batches_per_epoch), tf.int32) learning_rate = tf.train.exponential_decay( learning_rate=initial_learning_rate, global_step=global_step, decay_steps=int(FLAGS.learning_rate_decay_epochs * batches_per_epoch), decay_rate=FLAGS.learning_rate_decay, staircase=True) if FLAGS.use_learning_rate_warmup: wlr = 0.1 * adj_initial_learning_rate wlr_height = tf.cast( 0.9 * adj_initial_learning_rate / (FLAGS.warmup_epochs + FLAGS.learning_rate_decay_epochs - 1), tf.float32) epoch_offset = tf.cast(FLAGS.cold_epochs - 1, tf.int32) exp_decay_start = (FLAGS.warmup_epochs + FLAGS.cold_epochs + FLAGS.learning_rate_decay_epochs) lin_inc_lr = tf.add( wlr, tf.multiply( tf.cast(tf.subtract(current_epoch, epoch_offset), tf.float32), wlr_height)) learning_rate = tf.where( tf.greater_equal(current_epoch, FLAGS.cold_epochs), (tf.where(tf.greater_equal(current_epoch, exp_decay_start), learning_rate, lin_inc_lr)), wlr) # Set a minimum boundary for the learning rate. learning_rate = tf.maximum( learning_rate, final_learning_rate, name='learning_rate') if FLAGS.optimizer == 'sgd': tf.logging.info('Using SGD optimizer') optimizer = tf.train.GradientDescentOptimizer( learning_rate=learning_rate) elif FLAGS.optimizer == 'momentum': tf.logging.info('Using Momentum optimizer') optimizer = tf.train.MomentumOptimizer( learning_rate=learning_rate, momentum=0.9) elif FLAGS.optimizer == 'RMS': tf.logging.info('Using RMS optimizer') optimizer = tf.train.RMSPropOptimizer( learning_rate, RMSPROP_DECAY, momentum=RMSPROP_MOMENTUM, epsilon=RMSPROP_EPSILON) else: tf.logging.fatal('Unknown optimizer:', FLAGS.optimizer) if FLAGS.use_tpu: optimizer = contrib_tpu.CrossShardOptimizer(optimizer) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(loss, global_step=global_step) if FLAGS.moving_average: ema = tf.train.ExponentialMovingAverage( decay=MOVING_AVERAGE_DECAY, num_updates=global_step) variables_to_average = ( tf.trainable_variables() + tf.moving_average_variables()) with tf.control_dependencies([train_op]), tf.name_scope('moving_average'): train_op = ema.apply(variables_to_average) # To log the loss, current learning rate, and epoch for Tensorboard, the # summary op needs to be run on the host CPU via host_call. host_call # expects [batch_size, ...] Tensors, thus reshape to introduce a batch # dimension. These Tensors are implicitly concatenated to # [params['batch_size']]. gs_t = tf.reshape(global_step, [1]) loss_t = tf.reshape(loss, [1]) lr_t = tf.reshape(learning_rate, [1]) ce_t = tf.reshape(current_epoch, [1]) if not FLAGS.skip_host_call: def host_call_fn(gs, loss, lr, ce): """Training host call. Creates scalar summaries for training metrics. This function is executed on the CPU and should not directly reference any Tensors in the rest of the `model_fn`. To pass Tensors from the model to the `metric_fn`, provide them as part of the `host_call`. See https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec for more information. Arguments should match the list of `Tensor` objects passed as the second element in the tuple passed to `host_call`. Args: gs: `Tensor with shape `[batch]` for the global_step loss: `Tensor` with shape `[batch]` for the training loss. lr: `Tensor` with shape `[batch]` for the learning_rate. ce: `Tensor` with shape `[batch]` for the current_epoch. Returns: List of summary ops to run on the CPU host. """ gs = gs[0] with summary.create_file_writer(FLAGS.model_dir).as_default(): with summary.always_record_summaries(): summary.scalar('loss', tf.reduce_mean(loss), step=gs) summary.scalar('learning_rate', tf.reduce_mean(lr), step=gs) summary.scalar('current_epoch', tf.reduce_mean(ce), step=gs) return summary.all_summary_ops() host_call = (host_call_fn, [gs_t, loss_t, lr_t, ce_t]) eval_metrics = None if is_eval: def metric_fn(labels, logits): """Evaluation metric function. Evaluates accuracy. This function is executed on the CPU and should not directly reference any Tensors in the rest of the `model_fn`. To pass Tensors from the model to the `metric_fn`, provide as part of the `eval_metrics`. See https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec for more information. Arguments should match the list of `Tensor` objects passed as the second element in the tuple passed to `eval_metrics`. Args: labels: `Tensor` with shape `[batch, ]`. logits: `Tensor` with shape `[batch, num_classes]`. Returns: A dict of the metrics to return from evaluation. """ predictions = tf.argmax(logits, axis=1) top_1_accuracy = tf.metrics.accuracy(labels, predictions) in_top_5 = tf.cast(tf.nn.in_top_k(logits, labels, 5), tf.float32) top_5_accuracy = tf.metrics.mean(in_top_5) return { 'accuracy': top_1_accuracy, 'accuracy@5': top_5_accuracy, } eval_metrics = (metric_fn, [labels, logits]) return contrib_tpu.TPUEstimatorSpec( mode=mode, loss=loss, train_op=train_op, host_call=host_call, eval_metrics=eval_metrics)
name='inputs_raw') targets_raw = tf.placeholder(tf.float32, shape=[1, None, None, 3], name='targets_raw') path_LR = tf.placeholder(tf.string, shape=[], name='path_LR') path_HR = tf.placeholder(tf.string, shape=[], name='path_HR') with tf.variable_scope('generator'): if FLAGS.task == 'SRGAN' or FLAGS.task == 'SRResnet': gen_output = generator(inputs_raw, 3, reuse=False, FLAGS=FLAGS) else: raise NotImplementedError('Unknown task!!') print('Finish building the network') with tf.name_scope('convert_image'): # Deprocess the images outputed from the model inputs = deprocessLR(inputs_raw) targets = deprocess(targets_raw) outputs = deprocess(gen_output) # Convert back to uint8 converted_inputs = tf.image.convert_image_dtype(inputs, dtype=tf.uint8, saturate=True) converted_targets = tf.image.convert_image_dtype(targets, dtype=tf.uint8, saturate=True) converted_outputs = tf.image.convert_image_dtype(outputs, dtype=tf.uint8, saturate=True)
def transformer_model(input_tensor, attention_mask=None, hidden_size=768, num_hidden_layers=12, num_hidden_groups=12, num_attention_heads=12, intermediate_size=3072, inner_group_num=1, intermediate_act_fn="gelu", hidden_dropout_prob=0.1, attention_probs_dropout_prob=0.1, initializer_range=0.02, do_return_all_layers=False, use_einsum=True): """Multi-headed, multi-layer Transformer from "Attention is All You Need". This is almost an exact implementation of the original Transformer encoder. See the original paper: https://arxiv.org/abs/1706.03762 Also see: https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/models/transformer.py Args: input_tensor: float Tensor of shape [batch_size, seq_length, hidden_size]. attention_mask: (optional) int32 Tensor of shape [batch_size, seq_length], with 1 for positions that can be attended to and 0 in positions that should not be. hidden_size: int. Hidden size of the Transformer. num_hidden_layers: int. Number of layers (blocks) in the Transformer. num_hidden_groups: int. Number of group for the hidden layers, parameters in the same group are shared. num_attention_heads: int. Number of attention heads in the Transformer. intermediate_size: int. The size of the "intermediate" (a.k.a., feed forward) layer. inner_group_num: int, number of inner repetition of attention and ffn. intermediate_act_fn: function. The non-linear activation function to apply to the output of the intermediate/feed-forward layer. hidden_dropout_prob: float. Dropout probability for the hidden layers. attention_probs_dropout_prob: float. Dropout probability of the attention probabilities. initializer_range: float. Range of the initializer (stddev of truncated normal). do_return_all_layers: Whether to also return all layers or just the final layer. use_einsum: bool. Whether to use einsum or reshape+matmul for dense layers Returns: float Tensor of shape [batch_size, seq_length, hidden_size], the final hidden layer of the Transformer. Raises: ValueError: A Tensor shape or parameter is invalid. """ if hidden_size % num_attention_heads != 0: raise ValueError( "The hidden size (%d) is not a multiple of the number of attention " "heads (%d)" % (hidden_size, num_attention_heads)) attention_head_size = hidden_size // num_attention_heads input_shape = get_shape_list(input_tensor, expected_rank=3) input_width = input_shape[2] all_layer_outputs = [] if input_width != hidden_size: prev_output = dense_layer_2d(input_tensor, hidden_size, create_initializer(initializer_range), None, use_einsum=use_einsum, name="embedding_hidden_mapping_in") else: prev_output = input_tensor with tf.variable_scope("transformer", reuse=tf.AUTO_REUSE): for layer_idx in range(num_hidden_layers): group_idx = int(layer_idx / num_hidden_layers * num_hidden_groups) with tf.variable_scope("group_%d" % group_idx): with tf.name_scope("layer_%d" % layer_idx): layer_output = prev_output for inner_group_idx in range(inner_group_num): with tf.variable_scope("inner_group_%d" % inner_group_idx): layer_output = attention_ffn_block( layer_input=layer_output, hidden_size=hidden_size, attention_mask=attention_mask, num_attention_heads=num_attention_heads, attention_head_size=attention_head_size, attention_probs_dropout_prob= attention_probs_dropout_prob, intermediate_size=intermediate_size, intermediate_act_fn=intermediate_act_fn, initializer_range=initializer_range, hidden_dropout_prob=hidden_dropout_prob, use_einsum=use_einsum) prev_output = layer_output all_layer_outputs.append(layer_output) if do_return_all_layers: return all_layer_outputs else: return all_layer_outputs[-1]
def minimize(objective_function, initial_simplex=None, initial_vertex=None, step_sizes=None, objective_at_initial_simplex=None, objective_at_initial_vertex=None, batch_evaluate_objective=False, func_tolerance=1e-8, position_tolerance=1e-8, parallel_iterations=1, max_iterations=None, reflection=None, expansion=None, contraction=None, shrinkage=None, name=None): """Minimum of the objective function using the Nelder Mead simplex algorithm. Performs an unconstrained minimization of a (possibly non-smooth) function using the Nelder Mead simplex method. Nelder Mead method does not support univariate functions. Hence the dimensions of the domain must be 2 or greater. For details of the algorithm, see [Press, Teukolsky, Vetterling and Flannery(2007)][1]. Points in the domain of the objective function may be represented as a `Tensor` of general shape but with rank at least 1. The algorithm proceeds by modifying a full rank simplex in the domain. The initial simplex may either be specified by the user or can be constructed using a single vertex supplied by the user. In the latter case, if `v0` is the supplied vertex, the simplex is the convex hull of the set: ```None S = {v0} + {v0 + step_i * e_i} ``` Here `e_i` is a vector which is `1` along the `i`-th axis and zero elsewhere and `step_i` is a characteristic length scale along the `i`-th axis. If the step size is not supplied by the user, a unit step size is used in every axis. Alternately, a single step size may be specified which is used for every axis. The most flexible option is to supply a bespoke step size for every axis. ### Usage: The following example demonstrates the usage of the Nelder Mead minimzation on a two dimensional problem with the minimum located at a non-differentiable point. ```python # The objective function def sqrt_quadratic(x): return tf.sqrt(tf.reduce_sum(x ** 2, axis=-1)) start = tf.constant([6.0, -21.0]) # Starting point for the search. optim_results = tfp.optimizer.nelder_mead_minimize( sqrt_quadratic, initial_vertex=start, func_tolerance=1e-8, batch_evaluate_objective=True) # Check that the search converged assert(optim_results.converged) # Check that the argmin is close to the actual value. np.testing.assert_allclose(optim_results.position, np.array([0.0, 0.0]), atol=1e-7) # Print out the total number of function evaluations it took. print("Function evaluations: %d" % optim_results.num_objective_evaluations) ``` ### References: [1]: William Press, Saul Teukolsky, William Vetterling and Brian Flannery. Numerical Recipes in C++, third edition. pp. 502-507. (2007). http://numerical.recipes/cpppages/chap0sel.pdf [2]: Jeffrey Lagarias, James Reeds, Margaret Wright and Paul Wright. Convergence properties of the Nelder-Mead simplex method in low dimensions, Siam J. Optim., Vol 9, No. 1, pp. 112-147. (1998). http://www.math.kent.edu/~reichel/courses/Opt/reading.material.2/nelder.mead.pdf [3]: Fuchang Gao and Lixing Han. Implementing the Nelder-Mead simplex algorithm with adaptive parameters. Computational Optimization and Applications, Vol 51, Issue 1, pp 259-277. (2012). https://pdfs.semanticscholar.org/15b4/c4aa7437df4d032c6ee6ce98d6030dd627be.pdf Args: objective_function: A Python callable that accepts a point as a real `Tensor` and returns a `Tensor` of real dtype containing the value of the function at that point. The function to be minimized. If `batch_evaluate_objective` is `True`, the callable may be evaluated on a `Tensor` of shape `[n+1] + s ` where `n` is the dimension of the problem and `s` is the shape of a single point in the domain (so `n` is the size of a `Tensor` representing a single point). In this case, the expected return value is a `Tensor` of shape `[n+1]`. Note that this method does not support univariate functions so the problem dimension `n` must be strictly greater than 1. initial_simplex: (Optional) `Tensor` of real dtype. The initial simplex to start the search. If supplied, should be a `Tensor` of shape `[n+1] + s` where `n` is the dimension of the problem and `s` is the shape of a single point in the domain. Each row (i.e. the `Tensor` with a given value of the first index) is interpreted as a vertex of a simplex and hence the rows must be affinely independent. If not supplied, an axes aligned simplex is constructed using the `initial_vertex` and `step_sizes`. Only one and at least one of `initial_simplex` and `initial_vertex` must be supplied. initial_vertex: (Optional) `Tensor` of real dtype and any shape that can be consumed by the `objective_function`. A single point in the domain that will be used to construct an axes aligned initial simplex. step_sizes: (Optional) `Tensor` of real dtype and shape broadcasting compatible with `initial_vertex`. Supplies the simplex scale along each axes. Only used if `initial_simplex` is not supplied. See description above for details on how step sizes and initial vertex are used to construct the initial simplex. objective_at_initial_simplex: (Optional) Rank `1` `Tensor` of real dtype of a rank `1` `Tensor`. The value of the objective function at the initial simplex. May be supplied only if `initial_simplex` is supplied. If not supplied, it will be computed. objective_at_initial_vertex: (Optional) Scalar `Tensor` of real dtype. The value of the objective function at the initial vertex. May be supplied only if the `initial_vertex` is also supplied. batch_evaluate_objective: (Optional) Python `bool`. If True, the objective function will be evaluated on all the vertices of the simplex packed into a single tensor. If False, the objective will be mapped across each vertex separately. Evaluating the objective function in a batch allows use of vectorization and should be preferred if the objective function allows it. func_tolerance: (Optional) Scalar `Tensor` of real dtype. The algorithm stops if the absolute difference between the largest and the smallest function value on the vertices of the simplex is below this number. position_tolerance: (Optional) Scalar `Tensor` of real dtype. The algorithm stops if the largest absolute difference between the coordinates of the vertices is below this threshold. parallel_iterations: (Optional) Positive integer. The number of iterations allowed to run in parallel. max_iterations: (Optional) Scalar positive `Tensor` of dtype `int32`. The maximum number of iterations allowed. If `None` then no limit is applied. reflection: (Optional) Positive Scalar `Tensor` of same dtype as `initial_vertex`. This parameter controls the scaling of the reflected vertex. See, [Press et al(2007)][1] for details. If not specified, uses the dimension dependent prescription of [Gao and Han(2012)][3]. expansion: (Optional) Positive Scalar `Tensor` of same dtype as `initial_vertex`. Should be greater than `1` and `reflection`. This parameter controls the expanded scaling of a reflected vertex. See, [Press et al(2007)][1] for details. If not specified, uses the dimension dependent prescription of [Gao and Han(2012)][3]. contraction: (Optional) Positive scalar `Tensor` of same dtype as `initial_vertex`. Must be between `0` and `1`. This parameter controls the contraction of the reflected vertex when the objective function at the reflected point fails to show sufficient decrease. See, [Press et al(2007)][1] for more details. If not specified, uses the dimension dependent prescription of [Gao and Han(2012][3]. shrinkage: (Optional) Positive scalar `Tensor` of same dtype as `initial_vertex`. Must be between `0` and `1`. This parameter is the scale by which the simplex is shrunk around the best point when the other steps fail to produce improvements. See, [Press et al(2007)][1] for more details. If not specified, uses the dimension dependent prescription of [Gao and Han(2012][3]. name: (Optional) Python str. The name prefixed to the ops created by this function. If not supplied, the default name 'minimize' is used. Returns: optimizer_results: A namedtuple containing the following items: converged: Scalar boolean tensor indicating whether the minimum was found within tolerance. num_objective_evaluations: The total number of objective evaluations performed. position: A `Tensor` containing the last argument value found during the search. If the search converged, then this value is the argmin of the objective function. objective_value: A tensor containing the value of the objective function at the `position`. If the search converged, then this is the (local) minimum of the objective function. final_simplex: The last simplex constructed before stopping. final_objective_values: The objective function evaluated at the vertices of the final simplex. initial_simplex: The starting simplex. initial_objective_values: The objective function evaluated at the vertices of the initial simplex. num_iterations: The number of iterations of the main algorithm body. Raises: ValueError: If any of the following conditions hold 1. If none or more than one of `initial_simplex` and `initial_vertex` are supplied. 2. If `initial_simplex` and `step_sizes` are both specified. """ with tf1.name_scope(name, 'minimize', [ initial_simplex, initial_vertex, step_sizes, objective_at_initial_simplex, objective_at_initial_vertex, func_tolerance, position_tolerance ]): (dim, _, simplex, objective_at_simplex, num_evaluations) = _prepare_args(objective_function, initial_simplex, initial_vertex, step_sizes, objective_at_initial_simplex, objective_at_initial_vertex, batch_evaluate_objective) domain_dtype = simplex.dtype (reflection, expansion, contraction, shrinkage) = _resolve_parameters(dim, reflection, expansion, contraction, shrinkage, domain_dtype) closure_kwargs = dict( objective_function=objective_function, dim=dim, func_tolerance=func_tolerance, position_tolerance=position_tolerance, batch_evaluate_objective=batch_evaluate_objective, reflection=reflection, expansion=expansion, contraction=contraction, shrinkage=shrinkage) def _loop_body(_, iterations, simplex, objective_at_simplex, num_evaluations): (converged, next_simplex, next_objective, evaluations) = nelder_mead_one_step(simplex, objective_at_simplex, **closure_kwargs) return (converged, iterations + 1, next_simplex, next_objective, num_evaluations + evaluations) initial_args = (False, 0, simplex, objective_at_simplex, num_evaluations) # Loop until either we have converged or if the max iterations are supplied # then until we have converged or exhausted the available iteration budget. def _is_converged(converged, num_iterations, *ignored_args): # pylint:disable=unused-argument # It is important to ensure that not_converged is a tensor. If # converged is not a tensor but a Python bool, then the overloaded # op '~' acts as bitwise complement so ~True = -2 and ~False = -1. # In that case, the loop will never terminate. not_converged = tf.logical_not(converged) return (not_converged if max_iterations is None else (not_converged & (num_iterations < max_iterations))) (converged, num_iterations, final_simplex, final_objective_values, final_evaluations) = tf.while_loop( cond=_is_converged, body=_loop_body, loop_vars=initial_args, parallel_iterations=parallel_iterations) order = tf.argsort(final_objective_values, direction='ASCENDING', stable=True) best_index = order[0] # The explicit cast to Tensor below is done to avoid returning a mixture # of Python types and Tensors which cause problems with session.run. # In the eager mode, converged may remain a Python bool. Trying to evaluate # the whole tuple in one evaluate call will raise an exception because # of the presence of non-tensors. This is very annoying so we explicitly # cast those arguments to Tensors. return NelderMeadOptimizerResults( converged=tf.convert_to_tensor(value=converged), num_objective_evaluations=final_evaluations, position=final_simplex[best_index], objective_value=final_objective_values[best_index], final_simplex=final_simplex, final_objective_values=final_objective_values, num_iterations=tf.convert_to_tensor(value=num_iterations), initial_simplex=simplex, initial_objective_values=objective_at_simplex)
def _model_fn(features, labels, mode, params, model, variable_filter_fn=None): """Model definition entry. Args: features: the input image tensor with shape [batch_size, height, width, 3]. The height and width are fixed and equal. labels: the input labels in a dictionary. The labels include class targets and box targets which are dense label maps. The labels are generated from get_input_fn function in data/dataloader.py mode: the mode of TPUEstimator including TRAIN, EVAL, and PREDICT. params: the dictionary defines hyperparameters of model. The default settings are in default_hparams function in this file. model: the model outputs class logits and box regression outputs. variable_filter_fn: the filter function that takes trainable_variables and returns the variable list after applying the filter rule. Returns: tpu_spec: the TPUEstimatorSpec to run training, evaluation, or prediction. Raises: RuntimeError: if both ckpt and backbone_ckpt are set. """ utils.image('input_image', features) training_hooks = [] params['is_training_bn'] = (mode == tf.estimator.ModeKeys.TRAIN) if params['use_keras_model']: def model_fn(inputs): model = efficientdet_keras.EfficientDetNet( config=hparams_config.Config(params)) cls_out_list, box_out_list = model(inputs, params['is_training_bn']) cls_outputs, box_outputs = {}, {} for i in range(params['min_level'], params['max_level'] + 1): cls_outputs[i] = cls_out_list[i - params['min_level']] box_outputs[i] = box_out_list[i - params['min_level']] return cls_outputs, box_outputs else: model_fn = functools.partial(model, config=hparams_config.Config(params)) precision = utils.get_precision(params['strategy'], params['mixed_precision']) cls_outputs, box_outputs = utils.build_model_with_precision( precision, model_fn, features, params['is_training_bn']) levels = cls_outputs.keys() for level in levels: cls_outputs[level] = tf.cast(cls_outputs[level], tf.float32) box_outputs[level] = tf.cast(box_outputs[level], tf.float32) # First check if it is in PREDICT mode. if mode == tf.estimator.ModeKeys.PREDICT: predictions = { 'image': features, } for level in levels: predictions['cls_outputs_%d' % level] = cls_outputs[level] predictions['box_outputs_%d' % level] = box_outputs[level] return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) # Set up training loss and learning rate. update_learning_rate_schedule_parameters(params) global_step = tf.train.get_or_create_global_step() learning_rate = learning_rate_schedule(params, global_step) # cls_loss and box_loss are for logging. only total_loss is optimized. det_loss, cls_loss, box_loss, box_iou_loss = detection_loss( cls_outputs, box_outputs, labels, params) reg_l2loss = reg_l2_loss(params['weight_decay']) total_loss = det_loss + reg_l2loss if mode == tf.estimator.ModeKeys.TRAIN: utils.scalar('lrn_rate', learning_rate) utils.scalar('trainloss/cls_loss', cls_loss) utils.scalar('trainloss/box_loss', box_loss) utils.scalar('trainloss/det_loss', det_loss) utils.scalar('trainloss/reg_l2_loss', reg_l2loss) utils.scalar('trainloss/loss', total_loss) if params['iou_loss_type']: utils.scalar('trainloss/box_iou_loss', box_iou_loss) train_epochs = tf.cast(global_step, tf.float32) / params['steps_per_epoch'] utils.scalar('train_epochs', train_epochs) moving_average_decay = params['moving_average_decay'] if moving_average_decay: ema = tf.train.ExponentialMovingAverage( decay=moving_average_decay, num_updates=global_step) ema_vars = utils.get_ema_vars() if mode == tf.estimator.ModeKeys.TRAIN: if params['optimizer'].lower() == 'sgd': optimizer = tf.train.MomentumOptimizer( learning_rate, momentum=params['momentum']) elif params['optimizer'].lower() == 'adam': optimizer = tf.train.AdamOptimizer(learning_rate) else: raise ValueError('optimizers should be adam or sgd') if params['strategy'] == 'tpu': optimizer = tf.tpu.CrossShardOptimizer(optimizer) # Batch norm requires update_ops to be added as a train_op dependency. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) var_list = tf.trainable_variables() if variable_filter_fn: var_list = variable_filter_fn(var_list) if params.get('clip_gradients_norm', None): logging.info('clip gradients norm by %f', params['clip_gradients_norm']) grads_and_vars = optimizer.compute_gradients(total_loss, var_list) with tf.name_scope('clip'): grads = [gv[0] for gv in grads_and_vars] tvars = [gv[1] for gv in grads_and_vars] # First clip each variable's norm, then clip global norm. clip_norm = abs(params['clip_gradients_norm']) clipped_grads = [tf.clip_by_norm(g, clip_norm) for g in grads] clipped_grads, _ = tf.clip_by_global_norm(clipped_grads, clip_norm) utils.scalar('gradient_norm', tf.linalg.global_norm(clipped_grads)) grads_and_vars = list(zip(clipped_grads, tvars)) with tf.control_dependencies(update_ops): train_op = optimizer.apply_gradients(grads_and_vars, global_step) else: with tf.control_dependencies(update_ops): train_op = optimizer.minimize( total_loss, global_step, var_list=var_list) if moving_average_decay: with tf.control_dependencies([train_op]): train_op = ema.apply(ema_vars) else: train_op = None eval_metrics = None if mode == tf.estimator.ModeKeys.EVAL: def metric_fn(**kwargs): """Returns a dictionary that has the evaluation metrics.""" if params['nms_configs'].get('pyfunc', True): detections_bs = [] for index in range(kwargs['boxes'].shape[0]): nms_configs = params['nms_configs'] detections = tf.numpy_function( functools.partial(nms_np.per_class_nms, nms_configs=nms_configs), [ kwargs['boxes'][index], kwargs['scores'][index], kwargs['classes'][index], tf.slice(kwargs['image_ids'], [index], [1]), tf.slice(kwargs['image_scales'], [index], [1]), params['num_classes'], nms_configs['max_output_size'], ], tf.float32) detections_bs.append(detections) detections_bs = postprocess.transform_detections( tf.stack(detections_bs)) else: # These two branches should be equivalent, but currently they are not. # TODO(tanmingxing): enable the non_pyfun path after bug fix. nms_boxes, nms_scores, nms_classes, _ = postprocess.per_class_nms( params, kwargs['boxes'], kwargs['scores'], kwargs['classes'], kwargs['image_scales']) img_ids = tf.cast( tf.expand_dims(kwargs['image_ids'], -1), nms_scores.dtype) detections_bs = [ img_ids * tf.ones_like(nms_scores), nms_boxes[:, :, 1], nms_boxes[:, :, 0], nms_boxes[:, :, 3] - nms_boxes[:, :, 1], nms_boxes[:, :, 2] - nms_boxes[:, :, 0], nms_scores, nms_classes, ] detections_bs = tf.stack(detections_bs, axis=-1, name='detnections') if params.get('testdev_dir', None): logging.info('Eval testdev_dir %s', params['testdev_dir']) eval_metric = coco_metric.EvaluationMetric( testdev_dir=params['testdev_dir']) coco_metrics = eval_metric.estimator_metric_fn(detections_bs, tf.zeros([1])) else: logging.info('Eval val with groudtruths %s.', params['val_json_file']) eval_metric = coco_metric.EvaluationMetric( filename=params['val_json_file']) coco_metrics = eval_metric.estimator_metric_fn( detections_bs, kwargs['groundtruth_data'], params['label_map']) # Add metrics to output. cls_loss = tf.metrics.mean(kwargs['cls_loss_repeat']) box_loss = tf.metrics.mean(kwargs['box_loss_repeat']) output_metrics = { 'cls_loss': cls_loss, 'box_loss': box_loss, } output_metrics.update(coco_metrics) return output_metrics cls_loss_repeat = tf.reshape( tf.tile(tf.expand_dims(cls_loss, 0), [ params['batch_size'], ]), [params['batch_size'], 1]) box_loss_repeat = tf.reshape( tf.tile(tf.expand_dims(box_loss, 0), [ params['batch_size'], ]), [params['batch_size'], 1]) cls_outputs = postprocess.to_list(cls_outputs) box_outputs = postprocess.to_list(box_outputs) params['nms_configs']['max_nms_inputs'] = anchors.MAX_DETECTION_POINTS boxes, scores, classes = postprocess.pre_nms(params, cls_outputs, box_outputs) metric_fn_inputs = { 'cls_loss_repeat': cls_loss_repeat, 'box_loss_repeat': box_loss_repeat, 'image_ids': labels['source_ids'], 'groundtruth_data': labels['groundtruth_data'], 'image_scales': labels['image_scales'], 'boxes': boxes, 'scores': scores, 'classes': classes, } eval_metrics = (metric_fn, metric_fn_inputs) checkpoint = params.get('ckpt') or params.get('backbone_ckpt') if checkpoint and mode == tf.estimator.ModeKeys.TRAIN: # Initialize the model from an EfficientDet or backbone checkpoint. if params.get('ckpt') and params.get('backbone_ckpt'): raise RuntimeError( '--backbone_ckpt and --checkpoint are mutually exclusive') if params.get('backbone_ckpt'): var_scope = params['backbone_name'] + '/' if params['ckpt_var_scope'] is None: # Use backbone name as default checkpoint scope. ckpt_scope = params['backbone_name'] + '/' else: ckpt_scope = params['ckpt_var_scope'] + '/' else: # Load every var in the given checkpoint var_scope = ckpt_scope = '/' def scaffold_fn(): """Loads pretrained model through scaffold function.""" logging.info('restore variables from %s', checkpoint) var_map = utils.get_ckpt_var_map( ckpt_path=checkpoint, ckpt_scope=ckpt_scope, var_scope=var_scope, skip_mismatch=params['skip_mismatch']) tf.train.init_from_checkpoint(checkpoint, var_map) return tf.train.Scaffold() elif mode == tf.estimator.ModeKeys.EVAL and moving_average_decay: def scaffold_fn(): """Load moving average variables for eval.""" logging.info('Load EMA vars with ema_decay=%f', moving_average_decay) restore_vars_dict = ema.variables_to_restore(ema_vars) saver = tf.train.Saver(restore_vars_dict) return tf.train.Scaffold(saver=saver) else: scaffold_fn = None if params['strategy'] != 'tpu': # Profile every 1K steps. if params.get('profile', False): profile_hook = tf.estimator.ProfilerHook( save_steps=1000, output_dir=params['model_dir'], show_memory=True) training_hooks.append(profile_hook) # Report memory allocation if OOM class OomReportingHook(tf.estimator.SessionRunHook): def before_run(self, run_context): return tf.estimator.SessionRunArgs( fetches=[], options=tf.RunOptions(report_tensor_allocations_upon_oom=True)) training_hooks.append(OomReportingHook()) logging_hook = tf.estimator.LoggingTensorHook( { 'step': global_step, 'det_loss': det_loss, 'cls_loss': cls_loss, 'box_loss': box_loss, }, every_n_iter=params.get('iterations_per_loop', 100), ) training_hooks.append(logging_hook) if params['strategy'] == 'tpu': return tf.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, eval_metrics=eval_metrics, host_call=utils.get_tpu_host_call(global_step, params), scaffold_fn=scaffold_fn, training_hooks=training_hooks) else: eval_metric_ops = ( eval_metrics[0](**eval_metrics[1]) if eval_metrics else None) utils.get_tpu_host_call(global_step, params) return tf.estimator.EstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, eval_metric_ops=eval_metric_ops, scaffold=scaffold_fn() if scaffold_fn else None, training_hooks=training_hooks)
def generator(z, progress, num_filters_fn, resolution_schedule, num_blocks=None, kernel_size=3, colors=3, to_rgb_activation=None, simple_arch=False, scope='progressive_gan_generator', reuse=None): """Generator network for the progressive GAN model. Args: z: A `Tensor` of latent vector. The first dimension must be batch size. progress: A scalar float `Tensor` of training progress. num_filters_fn: A function that maps `block_id` to # of filters for the block. resolution_schedule: An object of `ResolutionSchedule`. num_blocks: An integer of number of blocks. None means maximum number of blocks, i.e. `resolution.schedule.num_resolutions`. Defaults to None. kernel_size: An integer of convolution kernel size. colors: Number of output color channels. Defaults to 3. to_rgb_activation: Activation function applied when output rgb. simple_arch: Architecture variants for lower memory usage and faster speed scope: A string or variable scope. reuse: Whether to reuse `scope`. Defaults to None which means to inherit the reuse option of the parent scope. Returns: A `Tensor` of model output and a dictionary of model end points. """ if num_blocks is None: num_blocks = resolution_schedule.num_resolutions start_h, start_w = resolution_schedule.start_resolutions final_h, final_w = resolution_schedule.final_resolutions def _conv2d(scope, x, kernel_size, filters, padding='SAME'): return layers.custom_conv2d( x=x, filters=filters, kernel_size=kernel_size, padding=padding, activation=lambda x: layers.pixel_norm(tf.nn.leaky_relu(x)), he_initializer_slope=0.0, scope=scope) def _to_rgb(x): return layers.custom_conv2d(x=x, filters=colors, kernel_size=1, padding='SAME', activation=to_rgb_activation, scope='to_rgb') he_init = tf_slim.variance_scaling_initializer() end_points = {} scalers = {} offsets = {} def hook(name, x): end_points[name] = x scaler_ph = tf.placeholder_with_default(np.ones( 1, x.dtype.as_numpy_dtype), shape=1, name="{}_scaler".format(name)) scalers[name] = scaler_ph offset_ph = tf.placeholder_with_default(np.zeros( x.shape, x.dtype.as_numpy_dtype), shape=x.shape, name=name) offsets[name] = offset_ph return x * scaler_ph + offset_ph with tf.variable_scope(scope, reuse=reuse): with tf.name_scope('input'): x = tf_slim.flatten(z) end_points['latent_vector'] = x with tf.variable_scope(block_name(1)): if simple_arch: x_shape = tf.shape(x) x = tf.layers.dense(x, start_h * start_w * num_filters_fn(1), kernel_initializer=he_init) x = tf.nn.relu(x) x = tf.reshape( x, [x_shape[0], start_h, start_w, num_filters_fn(1)]) else: x = tf.expand_dims(tf.expand_dims(x, 1), 1) x = layers.pixel_norm(x) # Pad the 1 x 1 image to 2 * (start_h - 1) x 2 * (start_w - 1) # with zeros for the next conv. x = tf.pad( x, [[0] * 2, [start_h - 1] * 2, [start_w - 1] * 2, [0] * 2]) # The output is start_h x start_w x num_filters_fn(1). x = _conv2d('conv0', x, (start_h, start_w), num_filters_fn(1), 'VALID') x = hook('conv0', x) x = _conv2d('conv1', x, kernel_size, num_filters_fn(1)) x = hook('conv1', x) lods = [x] if resolution_schedule.scale_mode == 'H': strides = (resolution_schedule.scale_base, 1) else: strides = (resolution_schedule.scale_base, resolution_schedule.scale_base) for block_id in range(2, num_blocks + 1): with tf.variable_scope(block_name(block_id)): if simple_arch: x = tf.layers.conv2d_transpose(x, num_filters_fn(block_id), kernel_size=kernel_size, strides=strides, padding='SAME', kernel_initializer=he_init) x = tf.nn.relu(x) else: x = resolution_schedule.upscale( x, resolution_schedule.scale_base) x = _conv2d('conv0', x, kernel_size, num_filters_fn(block_id)) x = hook('conv0_{}'.format(block_id), x) x = _conv2d('conv1', x, kernel_size, num_filters_fn(block_id)) x = hook('conv1_{}'.format(block_id), x) lods.append(x) outputs = [] for block_id in range(1, num_blocks + 1): with tf.variable_scope(block_name(block_id)): if simple_arch: lod = lods[block_id - 1] lod = tf.layers.conv2d(lod, colors, kernel_size=1, padding='SAME', name='to_rgb', kernel_initializer=he_init) lod = to_rgb_activation(lod) else: lod = _to_rgb(lods[block_id - 1]) scale = resolution_schedule.scale_factor(block_id) lod = resolution_schedule.upscale(lod, scale) end_points['upscaled_rgb_{}'.format(block_id)] = lod # alpha_i is used to replace lod_select. Note sum(alpha_i) is # garanteed to be 1. alpha = _generator_alpha(block_id, progress) end_points['alpha_{}'.format(block_id)] = alpha outputs.append(lod * alpha) predictions = tf.add_n(outputs) batch_size = int(z.shape[0]) predictions.set_shape([batch_size, final_h, final_w, colors]) end_points['predictions'] = predictions return predictions, end_points, {"scalers": scalers, "offsets": offsets}
def cnn(self): with tf.device('/cpu:0'): self.embedding = tf.get_variable( "embeddings", shape=[self.config.vocab_size, self.config.embedding_size], initializer=tf.constant_initializer(self.config.pre_trianing)) self.embedding_inputs = tf.nn.embedding_lookup( self.embedding, self.input_x) self.embedding_inputs_expanded = tf.expand_dims( self.embedding_inputs, -1) with tf.name_scope('cnn'): pooled_outputs = [] for i, filter_size in enumerate(self.config.filter_sizes): with tf.name_scope("conv-maxpool-%s" % filter_size): filter_shape = [ filter_size, self.config.embedding_size, 1, self.config.num_filters ] W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W") b = tf.Variable(tf.constant( 0.1, shape=[self.config.num_filters]), name="b") conv = tf.nn.conv2d(self.embedding_inputs_expanded, W, strides=[1, 1, 1, 1], padding="VALID", name="conv") h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") pooled = tf.nn.max_pool(h, ksize=[ 1, self.config.seq_length - filter_size + 1, 1, 1 ], strides=[1, 1, 1, 1], padding='VALID', name="pool") pooled_outputs.append(pooled) num_filters_total = self.config.num_filters * len( self.config.filter_sizes) self.h_pool = tf.concat(pooled_outputs, 3) self.outputs = tf.reshape(self.h_pool, [-1, num_filters_total]) with tf.name_scope("dropout"): self.final_output = tf.nn.dropout(self.outputs, self.keep_prob) with tf.name_scope('output'): fc_w = tf.get_variable('fc_w', shape=[ self.final_output.shape[1].value, self.config.num_classes ]) fc_b = tf.Variable(tf.constant(0.1, shape=[self.config.num_classes]), name='fc_b') self.logits = tf.matmul(self.final_output, fc_w) + fc_b self.prob = tf.nn.softmax(self.logits) self.y_pred_cls = tf.argmax(self.logits, 1, name='predictions') with tf.name_scope('loss'): cross_entropy = tf.nn.softmax_cross_entropy_with_logits( logits=self.logits, labels=self.input_y) self.l2_loss += tf.nn.l2_loss(fc_w) self.l2_loss += tf.nn.l2_loss(fc_b) self.loss = tf.reduce_mean( cross_entropy) + self.config.l2_reg_lambda * self.l2_loss self.loss = tf.reduce_mean(cross_entropy) with tf.name_scope('optimizer'): optimizer = tf.train.AdamOptimizer(self.config.lr) gradients, variables = zip(*optimizer.compute_gradients(self.loss)) gradients, _ = tf.clip_by_global_norm(gradients, self.config.clip) self.optim = optimizer.apply_gradients( zip(gradients, variables), global_step=self.global_step) with tf.name_scope('accuracy'): correct_pred = tf.equal(tf.argmax(self.input_y, 1), self.y_pred_cls) self.acc = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
def build(self): """ 构建计算图 :return: """ tf.reset_default_graph() self._train_graph = train_graph = tf.Graph() with train_graph.as_default(): # 获取输入占位符 self._inputs = inputs = self.get_inputs() # 获取User的4个嵌入向量 self._user_embeds = user_embed = self.get_user_embeds(inputs) # 得到用户特征 self._user_combined_feature = user_combine = \ self.get_user_combined_feature(user_embed, fc_outputs=self.num_outputs) user_combine_layer, user_combine_layer_flat = user_combine.get_combine_layer( ) # 获取电影ID的嵌入向量 self._item_embeds = movie_embed = self.get_item_embeds(inputs) # 获取电影名的特征向量 item_convs = self.get_item_convs( inputs=movie_embed["movie_titles"], dropout_keep_prob=inputs["dropout_keep_prob"]) pool_layer_flat, dropout_layer = item_convs["movie_titles"] # 得到电影特征 self._item_combined_feature = movie_combine = \ self.get_movie_combined_feature(movie_embed, dropout_layer, fc_outputs=self.num_outputs) movie_combine_layer, movie_combine_layer_flat = movie_combine.get_combine_layer( ) # 计算出评分,要注意两个不同的方案,inference的名字(name值)是不一样的,后面做推荐时要根据name取得tensor with tf.name_scope("inference"): # 将用户特征和电影特征作为输入,经过全连接,输出一个值的方案 # inference_layer = tf.concat([user_combine_layer_flat, movie_combine_layer_flat], 1) #(?, 200) # inference = tf.layers.dense(inference_layer, 1, # kernel_initializer=tf.truncated_normal_initializer(stddev=0.01), # kernel_regularizer=tf.nn.l2_loss, name="inference") # 简单的将用户特征和电影特征做矩阵乘法得到一个预测评分 # inference = tf.matmul(user_combine_layer_flat, tf.transpose(movie_combine_layer_flat)) inference = tf.reduce_sum(user_combine_layer_flat * movie_combine_layer_flat, axis=1) self._inference = tf.expand_dims(inference, axis=1) with tf.name_scope("loss"): # MSE损失,将计算值回归到评分 self._cost = tf.losses.mean_squared_error( inputs['targets'], self._inference) self._loss = tf.reduce_mean(self._cost) # 优化损失 # train_op = tf.train.AdamOptimizer(lr).minimize(loss) #cost self._global_step = tf.Variable(0, name="global_step", trainable=False) self._optimizer = tf.train.AdamOptimizer(inputs['LearningRate']) self._gradients = self._optimizer.compute_gradients( self._loss) # cost self._train_op = self._optimizer.apply_gradients( self._gradients, global_step=self._global_step)
def loss(self, prediction_dict): """ Returns cost for RCNN based on: Args: prediction_dict with keys: rcnn: cls_score: shape (num_proposals, num_classes + 1) Has the class scoring for each the proposals. Classes are 1-indexed with 0 being the background. cls_prob: shape (num_proposals, num_classes + 1) Application of softmax on cls_score. bbox_offsets: shape (num_proposals, num_classes * 4) Has the offset for each proposal for each class. We have to compare only the proposals labeled with the offsets for that label. target: cls_target: shape (num_proposals,) Has the correct label for each of the proposals. 0 => background 1..n => 1-indexed classes bbox_offsets_target: shape (num_proposals, 4) Has the true offset of each proposal for the true label. In case of not having a true label (non-background) then it's just zeroes. Returns: loss_dict with keys: rcnn_cls_loss: The cross-entropy or log-loss of the classification tasks between then num_classes + background. rcnn_reg_loss: The smooth L1 loss for the bounding box regression task to adjust correctly labeled boxes. """ with tf.name_scope('RCNNLoss'): cls_score = prediction_dict['rcnn']['cls_score'] # cls_prob = prediction_dict['rcnn']['cls_prob'] # Cast target explicitly as int32. cls_target = tf.cast(prediction_dict['target']['cls'], tf.int32) # First we need to calculate the log loss betweetn cls_prob and # cls_target # We only care for the targets that are >= 0 not_ignored = tf.reshape(tf.greater_equal(cls_target, 0), [-1], name='not_ignored') # We apply boolean mask to score, prob and target. cls_score_labeled = tf.boolean_mask(cls_score, not_ignored, name='cls_score_labeled') # cls_prob_labeled = tf.boolean_mask( # cls_prob, not_ignored, name='cls_prob_labeled') cls_target_labeled = tf.boolean_mask(cls_target, not_ignored, name='cls_target_labeled') tf.summary.scalar('batch_size', tf.shape(cls_score_labeled)[0], ['rcnn']) # Transform to one-hot vector cls_target_one_hot = tf.one_hot(cls_target_labeled, depth=self._num_classes + 1, name='cls_target_one_hot') # We get cross entropy loss of each proposal. cross_entropy_per_proposal = ( tf.nn.softmax_cross_entropy_with_logits_v2( labels=tf.stop_gradient(cls_target_one_hot), logits=cls_score_labeled)) if self._debug: prediction_dict['_debug']['losses'] = {} # Save the cross entropy per proposal to be able to # visualize proposals with high and low error. prediction_dict['_debug']['losses'][ 'cross_entropy_per_proposal'] = ( cross_entropy_per_proposal) # Second we need to calculate the smooth l1 loss between # `bbox_offsets` and `bbox_offsets_target`. bbox_offsets = prediction_dict['rcnn']['bbox_offsets'] bbox_offsets_target = (prediction_dict['target']['bbox_offsets']) # We only want the non-background labels bounding boxes. not_ignored = tf.reshape(tf.greater(cls_target, 0), [-1]) bbox_offsets_labeled = tf.boolean_mask(bbox_offsets, not_ignored, name='bbox_offsets_labeled') bbox_offsets_target_labeled = tf.boolean_mask( bbox_offsets_target, not_ignored, name='bbox_offsets_target_labeled') cls_target_labeled = tf.boolean_mask(cls_target, not_ignored, name='cls_target_labeled') # `cls_target_labeled` is based on `cls_target` which has # `num_classes` + 1 classes. # for making `one_hot` with depth `num_classes` to work we need # to lower them to make them 0-index. cls_target_labeled = cls_target_labeled - 1 cls_target_one_hot = tf.one_hot(cls_target_labeled, depth=self._num_classes, name='cls_target_one_hot') # cls_target now is (num_labeled, num_classes) bbox_flatten = tf.reshape(bbox_offsets_labeled, [-1, 4], name='bbox_flatten') # We use the flatten cls_target_one_hot as boolean mask for the # bboxes. cls_flatten = tf.cast(tf.reshape(cls_target_one_hot, [-1]), tf.bool, 'cls_flatten_as_bool') bbox_offset_cleaned = tf.boolean_mask(bbox_flatten, cls_flatten, 'bbox_offset_cleaned') # Calculate the smooth l1 loss between the "cleaned" bboxes # offsets (that means, the useful results) and the labeled # targets. reg_loss_per_proposal = smooth_l1_loss(bbox_offset_cleaned, bbox_offsets_target_labeled, sigma=self._l1_sigma) tf.summary.scalar('rcnn_foreground_samples', tf.shape(bbox_offset_cleaned)[0], ['rcnn']) if self._debug: # Also save reg loss per proposals to be able to visualize # good and bad proposals in debug mode. prediction_dict['_debug']['losses'][ 'reg_loss_per_proposal'] = (reg_loss_per_proposal) return { 'rcnn_cls_loss': tf.reduce_mean(cross_entropy_per_proposal), 'rcnn_reg_loss': tf.reduce_mean(reg_loss_per_proposal), }