コード例 #1
0
def random_horizontal_flip(image,
                           boxes=None,
                           masks=None,
                           keypoints=None,
                           keypoint_flip_permutation=None,
                           seed=None):
    """Randomly flips the image and detections horizontally.

  The probability of flipping the image is 50%.

  Args:
    image: rank 3 float32 tensor with shape [height, width, channels].
    boxes: (optional) rank 2 float32 tensor with shape [N, 4]
           containing the bounding boxes.
           Boxes are in normalized form meaning their coordinates vary
           between [0, 1].
           Each row is in the form of [ymin, xmin, ymax, xmax].
    masks: (optional) rank 3 float32 tensor with shape
           [num_instances, height, width] containing instance masks. The masks
           are of the same height, width as the input `image`.
    keypoints: (optional) rank 3 float32 tensor with shape
               [num_instances, num_keypoints, 2]. The keypoints are in y-x
               normalized coordinates.
    keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip
                               permutation.
    seed: random seed

  Returns:
    image: image which is the same shape as input image.

    If boxes, masks, keypoints, and keypoint_flip_permutation are not None,
    the function also returns the following tensors.

    boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
           Boxes are in normalized form meaning their coordinates vary
           between [0, 1].
    masks: rank 3 float32 tensor with shape [num_instances, height, width]
           containing instance masks.
    keypoints: rank 3 float32 tensor with shape
               [num_instances, num_keypoints, 2]

  Raises:
    ValueError: if keypoints are provided but keypoint_flip_permutation is not.
  """
    def _flip_image(image):
        # flip image
        image_flipped = tf.image.flip_left_right(image)
        return image_flipped

    if keypoints is not None and keypoint_flip_permutation is None:
        raise ValueError(
            'keypoints are provided but keypoints_flip_permutation is not provided'
        )

    with tf.name_scope('RandomHorizontalFlip', values=[image, boxes]):
        result = []
        # random variable defining whether to do flip or not
        do_a_flip_random = tf.greater(tf.random_uniform([], seed=seed), 0.5)

        # flip image
        image = tf.cond(do_a_flip_random, lambda: _flip_image(image),
                        lambda: image)
        result.append(image)

        # flip boxes
        if boxes is not None:
            boxes = tf.cond(do_a_flip_random,
                            lambda: _flip_boxes_left_right(boxes),
                            lambda: boxes)
            result.append(boxes)

        # flip masks
        if masks is not None:
            masks = tf.cond(do_a_flip_random,
                            lambda: _flip_masks_left_right(masks),
                            lambda: masks)
            result.append(masks)

        # flip keypoints
        if keypoints is not None and keypoint_flip_permutation is not None:
            permutation = keypoint_flip_permutation
            keypoints = tf.cond(
                do_a_flip_random,
                lambda: keypoint_flip_horizontal(keypoints, 0.5, permutation),
                lambda: keypoints)
            result.append(keypoints)

        return tuple(result)
コード例 #2
0
ファイル: det_model_fn.py プロジェクト: yingmuying/automl
def _model_fn(features, labels, mode, params, model, variable_filter_fn=None):
    """Model definition entry.

  Args:
    features: the input image tensor with shape [batch_size, height, width, 3].
      The height and width are fixed and equal.
    labels: the input labels in a dictionary. The labels include class targets
      and box targets which are dense label maps. The labels are generated from
      get_input_fn function in data/dataloader.py
    mode: the mode of TPUEstimator including TRAIN, EVAL, and PREDICT.
    params: the dictionary defines hyperparameters of model. The default
      settings are in default_hparams function in this file.
    model: the model outputs class logits and box regression outputs.
    variable_filter_fn: the filter function that takes trainable_variables and
      returns the variable list after applying the filter rule.

  Returns:
    tpu_spec: the TPUEstimatorSpec to run training, evaluation, or prediction.

  Raises:
    RuntimeError: if both ckpt and backbone_ckpt are set.
  """

    # Convert params (dict) to Config for easier access.
    def _model_outputs():
        return model(features, config=hparams_config.Config(params))

    if params['use_bfloat16']:
        with tf.tpu.bfloat16_scope():
            cls_outputs, box_outputs = _model_outputs()
            levels = cls_outputs.keys()
            for level in levels:
                cls_outputs[level] = tf.cast(cls_outputs[level], tf.float32)
                box_outputs[level] = tf.cast(box_outputs[level], tf.float32)
    else:
        cls_outputs, box_outputs = _model_outputs()
        levels = cls_outputs.keys()

    # First check if it is in PREDICT mode.
    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {
            'image': features,
        }
        for level in levels:
            predictions['cls_outputs_%d' % level] = cls_outputs[level]
            predictions['box_outputs_%d' % level] = box_outputs[level]
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    # Set up training loss and learning rate.
    update_learning_rate_schedule_parameters(params)
    global_step = tf.train.get_or_create_global_step()
    learning_rate = learning_rate_schedule(params, global_step)

    # cls_loss and box_loss are for logging. only total_loss is optimized.
    det_loss, cls_loss, box_loss = detection_loss(cls_outputs, box_outputs,
                                                  labels, params)
    l2loss = reg_l2_loss(params['weight_decay'])
    total_loss = det_loss + l2loss

    if mode == tf.estimator.ModeKeys.TRAIN:
        utils.scalar('lrn_rate', learning_rate)
        utils.scalar('trainloss/cls_loss', cls_loss)
        utils.scalar('trainloss/box_loss', box_loss)
        utils.scalar('trainloss/det_loss', det_loss)
        utils.scalar('trainloss/l2_loss', l2loss)
        utils.scalar('trainloss/loss', total_loss)

    moving_average_decay = params['moving_average_decay']
    if moving_average_decay:
        ema = tf.train.ExponentialMovingAverage(decay=moving_average_decay,
                                                num_updates=global_step)
        ema_vars = utils.get_ema_vars()

    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.MomentumOptimizer(learning_rate,
                                               momentum=params['momentum'])
        if params['use_tpu']:
            optimizer = tf.tpu.CrossShardOptimizer(optimizer)

        # Batch norm requires update_ops to be added as a train_op dependency.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        var_list = tf.trainable_variables()
        if variable_filter_fn:
            var_list = variable_filter_fn(var_list, params['resnet_depth'])

        if params.get('clip_gradients_norm', 0) > 0:
            logging.info('clip gradients norm by %f',
                         params['clip_gradients_norm'])
            grads_and_vars = optimizer.compute_gradients(total_loss, var_list)
            with tf.name_scope('clip'):
                grads = [gv[0] for gv in grads_and_vars]
                tvars = [gv[1] for gv in grads_and_vars]
                clipped_grads, gnorm = tf.clip_by_global_norm(
                    grads, params['clip_gradients_norm'])
                utils.scalar('gnorm', gnorm)
                grads_and_vars = list(zip(clipped_grads, tvars))

            with tf.control_dependencies(update_ops):
                train_op = optimizer.apply_gradients(grads_and_vars,
                                                     global_step)
        else:
            with tf.control_dependencies(update_ops):
                train_op = optimizer.minimize(total_loss,
                                              global_step,
                                              var_list=var_list)

        if moving_average_decay:
            with tf.control_dependencies([train_op]):
                train_op = ema.apply(ema_vars)

    else:
        train_op = None

    eval_metrics = None
    if mode == tf.estimator.ModeKeys.EVAL:

        def metric_fn(**kwargs):
            """Returns a dictionary that has the evaluation metrics."""
            batch_size = params['batch_size']
            eval_anchors = anchors.Anchors(params['min_level'],
                                           params['max_level'],
                                           params['num_scales'],
                                           params['aspect_ratios'],
                                           params['anchor_scale'],
                                           params['image_size'])
            anchor_labeler = anchors.AnchorLabeler(eval_anchors,
                                                   params['num_classes'])
            cls_loss = tf.metrics.mean(kwargs['cls_loss_repeat'])
            box_loss = tf.metrics.mean(kwargs['box_loss_repeat'])
            coco_metrics = coco_metric_fn(batch_size, anchor_labeler,
                                          params['val_json_file'], **kwargs)

            # Add metrics to output.
            output_metrics = {
                'cls_loss': cls_loss,
                'box_loss': box_loss,
            }
            output_metrics.update(coco_metrics)
            return output_metrics

        cls_loss_repeat = tf.reshape(
            tf.tile(tf.expand_dims(cls_loss, 0), [
                params['batch_size'],
            ]), [params['batch_size'], 1])
        box_loss_repeat = tf.reshape(
            tf.tile(tf.expand_dims(box_loss, 0), [
                params['batch_size'],
            ]), [params['batch_size'], 1])
        metric_fn_inputs = {
            'cls_loss_repeat': cls_loss_repeat,
            'box_loss_repeat': box_loss_repeat,
            'source_ids': labels['source_ids'],
            'groundtruth_data': labels['groundtruth_data'],
            'image_scales': labels['image_scales'],
        }
        add_metric_fn_inputs(params, cls_outputs, box_outputs,
                             metric_fn_inputs)
        eval_metrics = (metric_fn, metric_fn_inputs)

    checkpoint = params.get('ckpt') or params.get('backbone_ckpt')

    if checkpoint and mode == tf.estimator.ModeKeys.TRAIN:
        # Initialize the model from an EfficientDet or backbone checkpoint.
        if params.get('ckpt') and params.get('backbone_ckpt'):
            raise RuntimeError(
                '--backbone_ckpt and --checkpoint are mutually exclusive')
        elif params.get('backbone_ckpt'):
            var_scope = params['backbone_name'] + '/'
            if params['ckpt_var_scope'] is None:
                # Use backbone name as default checkpoint scope.
                ckpt_scope = params['backbone_name'] + '/'
            else:
                ckpt_scope = params['ckpt_var_scope'] + '/'
        else:
            # Load every var in the given checkpoint
            var_scope = ckpt_scope = '/'

        def scaffold_fn():
            """Loads pretrained model through scaffold function."""
            logging.info('restore variables from %s', checkpoint)

            var_map = utils.get_ckt_var_map(ckpt_path=checkpoint,
                                            ckpt_scope=ckpt_scope,
                                            var_scope=var_scope)
            tf.train.init_from_checkpoint(checkpoint, var_map)

            return tf.train.Scaffold()
    elif mode == tf.estimator.ModeKeys.EVAL and moving_average_decay:

        def scaffold_fn():
            """Load moving average variables for eval."""
            logging.info('Load EMA vars with ema_decay=%f',
                         moving_average_decay)
            restore_vars_dict = ema.variables_to_restore(ema_vars)
            saver = tf.train.Saver(restore_vars_dict)
            return tf.train.Scaffold(saver=saver)
    else:
        scaffold_fn = None

    return tf.estimator.tpu.TPUEstimatorSpec(mode=mode,
                                             loss=total_loss,
                                             train_op=train_op,
                                             eval_metrics=eval_metrics,
                                             host_call=utils.get_tpu_host_call(
                                                 global_step, params),
                                             scaffold_fn=scaffold_fn)
コード例 #3
0
def _model_fn(features, labels, mode, params, variable_filter_fn=None):
    """Model defination for the Mask-RCNN model based on ResNet.

  Args:
    features: the input image tensor and auxiliary information, such as
      `image_info` and `source_ids`. The image tensor has a shape of
      [batch_size, height, width, 3]. The height and width are fixed and equal.
    labels: the input labels in a dictionary. The labels include score targets
      and box targets which are dense label maps. The labels are generated from
      get_input_fn function in data/dataloader.py
    mode: the mode of TPUEstimator including TRAIN, EVAL, and PREDICT.
    params: the dictionary defines hyperparameters of model. The default
      settings are in default_hparams function in this file.
    variable_filter_fn: the filter function that takes trainable_variables and
      returns the variable list after applying the filter rule.

  Returns:
    tpu_spec: the TPUEstimatorSpec to run training, evaluation, or prediction.
  """
    if (mode == tf.estimator.ModeKeys.PREDICT
            or mode == tf.estimator.ModeKeys.EVAL):
        if ((params['include_groundtruth_in_features']
             or mode == tf.estimator.ModeKeys.EVAL)
                and ('labels' in features)):
            # In include groundtruth for eval.
            labels = features['labels']

        if 'features' in features:
            features = features['features']
            # Otherwise, it is in export mode, the features is past in directly.

    if params['precision'] == 'bfloat16':
        with tf.tpu.bfloat16_scope():
            model_outputs = build_model_graph(
                features, labels, mode == tf.estimator.ModeKeys.TRAIN, params)
            model_outputs.update({
                'source_id': features['source_ids'],
                'image_info': features['image_info'],
            })

            def cast_outputs_to_float(d):
                for k, v in sorted(six.iteritems(d)):
                    if isinstance(v, dict):
                        cast_outputs_to_float(v)
                    else:
                        d[k] = tf.cast(v, tf.float32)

            cast_outputs_to_float(model_outputs)
    else:
        model_outputs = build_model_graph(features, labels,
                                          mode == tf.estimator.ModeKeys.TRAIN,
                                          params)
        model_outputs.update({
            'source_id': features['source_ids'],
            'image_info': features['image_info'],
        })

    # First check if it is in PREDICT or EVAL mode to fill out predictions.
    # Predictions are used during the eval step to generate metrics.
    predictions = {}
    if (mode == tf.estimator.ModeKeys.PREDICT
            or mode == tf.estimator.ModeKeys.EVAL):
        if 'orig_images' in features:
            model_outputs['orig_images'] = features['orig_images']
        if labels and params['include_groundtruth_in_features']:
            # Labels can only be embedded in predictions. The predition cannot output
            # dictionary as a value.
            predictions.update(labels)
        model_outputs.pop('fpn_features', None)
        predictions.update(model_outputs)
        # If we are doing PREDICT, we can return here.
        if mode == tf.estimator.ModeKeys.PREDICT:
            if params['use_tpu']:
                return tf.estimator.tpu.TPUEstimatorSpec(
                    mode=mode, predictions=predictions)
            return tf.estimator.EstimatorSpec(mode=mode,
                                              predictions=predictions)

    # Set up training loss and learning rate.
    global_step = tf.train.get_or_create_global_step()
    if params['learning_rate_type'] == 'step':
        learning_rate = learning_rates.step_learning_rate_with_linear_warmup(
            global_step, params['init_learning_rate'],
            params['warmup_learning_rate'], params['warmup_steps'],
            params['learning_rate_levels'], params['learning_rate_steps'])
    elif params['learning_rate_type'] == 'cosine':
        learning_rate = learning_rates.cosine_learning_rate_with_linear_warmup(
            global_step, params['init_learning_rate'],
            params['warmup_learning_rate'], params['warmup_steps'],
            params['total_steps'])
    else:
        raise ValueError('Unsupported learning rate type: `{}`!'.format(
            params['learning_rate_type']))
    # score_loss and box_loss are for logging. only total_loss is optimized.
    total_rpn_loss, rpn_score_loss, rpn_box_loss = losses.rpn_loss(
        model_outputs['rpn_score_outputs'], model_outputs['rpn_box_outputs'],
        labels, params)

    (total_fast_rcnn_loss, fast_rcnn_class_loss,
     fast_rcnn_box_loss) = losses.fast_rcnn_loss(
         model_outputs['class_outputs'], model_outputs['box_outputs'],
         model_outputs['class_targets'], model_outputs['box_targets'], params)
    # Only training has the mask loss. Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/modeling/model_builder.py  # pylint: disable=line-too-long
    if mode == tf.estimator.ModeKeys.TRAIN and params['include_mask']:
        mask_loss = losses.mask_rcnn_loss(
            model_outputs['mask_outputs'], model_outputs['mask_targets'],
            model_outputs['selected_class_targets'], params)
    else:
        mask_loss = 0.
    if variable_filter_fn and ('resnet' in params['backbone']):
        var_list = variable_filter_fn(tf.trainable_variables(),
                                      params['backbone'] + '/')
    else:
        var_list = tf.trainable_variables()
    l2_regularization_loss = params['l2_weight_decay'] * tf.add_n([
        tf.nn.l2_loss(v) for v in var_list
        if 'batch_normalization' not in v.name and 'bias' not in v.name
    ])
    total_loss = (total_rpn_loss + total_fast_rcnn_loss + mask_loss +
                  l2_regularization_loss)

    host_call = None
    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = create_optimizer(learning_rate, params)
        if params['use_tpu']:
            optimizer = tf.tpu.CrossShardOptimizer(optimizer)

        scaffold_fn = None
        if params['warm_start_path']:

            def warm_start_scaffold_fn():
                logging.info('model_fn warm start from: %s,',
                             params['warm_start_path'])
                assignment_map = _build_assigment_map(
                    optimizer,
                    prefix=None,
                    skip_variables_regex=params['skip_checkpoint_variables'])
                tf.train.init_from_checkpoint(params['warm_start_path'],
                                              assignment_map)
                return tf.train.Scaffold()

            scaffold_fn = warm_start_scaffold_fn

        elif params['checkpoint']:

            def backbone_scaffold_fn():
                """Loads pretrained model through scaffold function."""
                # Exclude all variable of optimizer.
                vars_to_load = _build_assigment_map(
                    optimizer,
                    prefix=params['backbone'] + '/',
                    skip_variables_regex=params['skip_checkpoint_variables'])
                tf.train.init_from_checkpoint(params['checkpoint'],
                                              vars_to_load)
                if not vars_to_load:
                    raise ValueError('Variables to load is empty.')
                return tf.train.Scaffold()

            scaffold_fn = backbone_scaffold_fn

        # Batch norm requires update_ops to be added as a train_op dependency.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        grads_and_vars = optimizer.compute_gradients(total_loss, var_list)
        if params['global_gradient_clip_ratio'] > 0:
            # Clips the gradients for training stability.
            # Refer: https://arxiv.org/abs/1211.5063
            with tf.name_scope('clipping'):
                old_grads, variables = zip(*grads_and_vars)
                num_weights = sum(g.shape.num_elements() for g in old_grads
                                  if g is not None)
                clip_norm = params['global_gradient_clip_ratio'] * math.sqrt(
                    num_weights)
                logging.info(
                    'Global clip norm set to %g for %d variables with %d elements.',
                    clip_norm, sum(1 for g in old_grads if g is not None),
                    num_weights)
                gradients, _ = tf.clip_by_global_norm(old_grads, clip_norm)
        else:
            gradients, variables = zip(*grads_and_vars)
        grads_and_vars = []
        # Special treatment for biases (beta is named as bias in reference model)
        # Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/modeling/optimizer.py#L113  # pylint: disable=line-too-long
        for grad, var in zip(gradients, variables):
            if grad is not None and ('beta' in var.name or 'bias' in var.name):
                grad = 2.0 * grad
            grads_and_vars.append((grad, var))

        with tf.control_dependencies(update_ops):
            train_op = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step)

        if params['use_host_call']:

            def host_call_fn(global_step, total_loss, total_rpn_loss,
                             rpn_score_loss, rpn_box_loss,
                             total_fast_rcnn_loss, fast_rcnn_class_loss,
                             fast_rcnn_box_loss, mask_loss,
                             l2_regularization_loss, learning_rate):
                """Training host call. Creates scalar summaries for training metrics.

        This function is executed on the CPU and should not directly reference
        any Tensors in the rest of the `model_fn`. To pass Tensors from the
        model to the `metric_fn`, provide as part of the `host_call`. See
        https://www.tensorflow.org/api_docs/python/tf/estimator/tpu/TPUEstimatorSpec
        for more information.

        Arguments should match the list of `Tensor` objects passed as the second
        element in the tuple passed to `host_call`.

        Args:
          global_step: `Tensor with shape `[batch, ]` for the global_step.
          total_loss: `Tensor` with shape `[batch, ]` for the training loss.
          total_rpn_loss: `Tensor` with shape `[batch, ]` for the training RPN
            loss.
          rpn_score_loss: `Tensor` with shape `[batch, ]` for the training RPN
            score loss.
          rpn_box_loss: `Tensor` with shape `[batch, ]` for the training RPN
            box loss.
          total_fast_rcnn_loss: `Tensor` with shape `[batch, ]` for the
            training Mask-RCNN loss.
          fast_rcnn_class_loss: `Tensor` with shape `[batch, ]` for the
            training Mask-RCNN class loss.
          fast_rcnn_box_loss: `Tensor` with shape `[batch, ]` for the
            training Mask-RCNN box loss.
          mask_loss: `Tensor` with shape `[batch, ]` for the training Mask-RCNN
            mask loss.
          l2_regularization_loss: `Tensor` with shape `[batch, ]` for the
            regularization loss.
          learning_rate: `Tensor` with shape `[batch, ]` for the learning_rate.

        Returns:
          List of summary ops to run on the CPU host.
        """
                # Outfeed supports int32 but global_step is expected to be int64.
                global_step = tf.reduce_mean(global_step)
                # Host call fns are executed FLAGS.iterations_per_loop times after one
                # TPU loop is finished, setting max_queue value to the same as number of
                # iterations will make the summary writer only flush the data to storage
                # once per loop.
                with (tf2.summary.create_file_writer(
                        params['model_dir'],
                        max_queue=params['iterations_per_loop']).as_default()):
                    with tf2.summary.record_if(True):
                        tf2.summary.scalar('total_loss',
                                           tf.reduce_mean(total_loss),
                                           step=global_step)
                        tf2.summary.scalar('total_rpn_loss',
                                           tf.reduce_mean(total_rpn_loss),
                                           step=global_step)
                        tf2.summary.scalar('rpn_score_loss',
                                           tf.reduce_mean(rpn_score_loss),
                                           step=global_step)
                        tf2.summary.scalar('rpn_box_loss',
                                           tf.reduce_mean(rpn_box_loss),
                                           step=global_step)
                        tf2.summary.scalar(
                            'total_fast_rcnn_loss',
                            tf.reduce_mean(total_fast_rcnn_loss),
                            step=global_step)
                        tf2.summary.scalar(
                            'fast_rcnn_class_loss',
                            tf.reduce_mean(fast_rcnn_class_loss),
                            step=global_step)
                        tf2.summary.scalar('fast_rcnn_box_loss',
                                           tf.reduce_mean(fast_rcnn_box_loss),
                                           step=global_step)
                        if params['include_mask']:
                            tf2.summary.scalar('mask_loss',
                                               tf.reduce_mean(mask_loss),
                                               step=global_step)
                        tf2.summary.scalar(
                            'l2_regularization_loss',
                            tf.reduce_mean(l2_regularization_loss),
                            step=global_step)
                        tf2.summary.scalar('learning_rate',
                                           tf.reduce_mean(learning_rate),
                                           step=global_step)

                        return tf.summary.all_v2_summary_ops()

            # To log the loss, current learning rate, and epoch for Tensorboard, the
            # summary op needs to be run on the host CPU via host_call. host_call
            # expects [batch_size, ...] Tensors, thus reshape to introduce a batch
            # dimension. These Tensors are implicitly concatenated to
            # [params['batch_size']].
            global_step_t = tf.reshape(global_step, [1])
            total_loss_t = tf.reshape(total_loss, [1])
            total_rpn_loss_t = tf.reshape(total_rpn_loss, [1])
            rpn_score_loss_t = tf.reshape(rpn_score_loss, [1])
            rpn_box_loss_t = tf.reshape(rpn_box_loss, [1])
            total_fast_rcnn_loss_t = tf.reshape(total_fast_rcnn_loss, [1])
            fast_rcnn_class_loss_t = tf.reshape(fast_rcnn_class_loss, [1])
            fast_rcnn_box_loss_t = tf.reshape(fast_rcnn_box_loss, [1])
            mask_loss_t = tf.reshape(mask_loss, [1])
            l2_regularization_loss = tf.reshape(l2_regularization_loss, [1])
            learning_rate_t = tf.reshape(learning_rate, [1])
            host_call = (host_call_fn, [
                global_step_t, total_loss_t, total_rpn_loss_t,
                rpn_score_loss_t, rpn_box_loss_t, total_fast_rcnn_loss_t,
                fast_rcnn_class_loss_t, fast_rcnn_box_loss_t, mask_loss_t,
                l2_regularization_loss, learning_rate_t
            ])
    else:
        train_op = None
        scaffold_fn = None

    if params['use_tpu']:
        return tf.estimator.tpu.TPUEstimatorSpec(mode=mode,
                                                 loss=total_loss,
                                                 train_op=train_op,
                                                 host_call=host_call,
                                                 scaffold_fn=scaffold_fn)
    return tf.estimator.EstimatorSpec(mode=mode,
                                      loss=total_loss,
                                      train_op=train_op)
コード例 #4
0
ファイル: chatbot.py プロジェクト: Nickhill28/Wachowski
inputs, targets, lr, keep_prob = model_input()

#setting sequence length
sequence_length = tf.placeholder_with_default(25, None, "Sequence_length")

# Getting the shape of the inputs tensor
input_shape = tf.shape(inputs)

# Getting the training and test predictions
training_predictions, test_predictions = seq2seq_model(
    tf.reverse(inputs, [-1]), targets, keep_prob, batch_size, sequence_length,
    len(answerword2int), len(questionword2int), encoding_embedding_size,
    decoding_embedding_size, rnn_size, num_layers, questionword2int)

# Setting up the Loss Error, the Optimizer and Gradient Clipping
with tf.name_scope("optimization"):
    loss_error = tf.contrib.seq2seq.sequence_loss(
        training_predictions, targets,
        tf.ones([input_shape[0], sequence_length]))
    optimizer = tf.train.AdamOptimizer(learning_rate)
    gradients = optimizer.compute_gradients(loss_error)
    clipped_gradients = [(tf.clip_by_value(grad_tensor, -5.,
                                           5.), grad_variable)
                         for grad_tensor, grad_variable in gradients
                         if grad_tensor is not None]
    optimizer_gradient_clipping = optimizer.apply_gradients(clipped_gradients)


# Padding the sequences with the  token
def apply_padding(batch_of_sequences, word2int):
    max_sequence_length = max(
コード例 #5
0
ファイル: nelder_mead.py プロジェクト: xzxzmmnn/probability
def nelder_mead_one_step(current_simplex,
                         current_objective_values,
                         objective_function=None,
                         dim=None,
                         func_tolerance=None,
                         position_tolerance=None,
                         batch_evaluate_objective=False,
                         reflection=None,
                         expansion=None,
                         contraction=None,
                         shrinkage=None,
                         name=None):
    """A single iteration of the Nelder Mead algorithm."""
    with tf1.name_scope(name, 'nelder_mead_one_step'):
        domain_dtype = current_simplex.dtype.base_dtype
        order = tf.argsort(current_objective_values,
                           direction='ASCENDING',
                           stable=True)
        (best_index, worst_index,
         second_worst_index) = order[0], order[-1], order[-2]

        worst_vertex = current_simplex[worst_index]

        (best_objective_value, worst_objective_value,
         second_worst_objective_value) = (
             current_objective_values[best_index],
             current_objective_values[worst_index],
             current_objective_values[second_worst_index])

        # Compute the centroid of the face opposite the worst vertex.
        face_centroid = tf.reduce_sum(input_tensor=current_simplex,
                                      axis=0) - worst_vertex
        face_centroid /= tf.cast(dim, domain_dtype)

        # Reflect the worst vertex through the opposite face.
        reflected = face_centroid + reflection * (face_centroid - worst_vertex)
        objective_at_reflected = objective_function(reflected)

        num_evaluations = 1
        has_converged = _check_convergence(current_simplex,
                                           current_simplex[best_index],
                                           best_objective_value,
                                           worst_objective_value,
                                           func_tolerance, position_tolerance)

        def _converged_fn():
            return (True, current_simplex, current_objective_values, 0)

        case0 = has_converged, _converged_fn
        accept_reflected = (
            (objective_at_reflected < second_worst_objective_value) &
            (objective_at_reflected >= best_objective_value))
        accept_reflected_fn = _accept_reflected_fn(current_simplex,
                                                   current_objective_values,
                                                   worst_index, reflected,
                                                   objective_at_reflected)
        case1 = accept_reflected, accept_reflected_fn
        do_expansion = objective_at_reflected < best_objective_value
        expansion_fn = _expansion_fn(objective_function, current_simplex,
                                     current_objective_values, worst_index,
                                     reflected, objective_at_reflected,
                                     face_centroid, expansion)
        case2 = do_expansion, expansion_fn
        do_outside_contraction = (
            (objective_at_reflected < worst_objective_value) &
            (objective_at_reflected >= second_worst_objective_value))
        outside_contraction_fn = _outside_contraction_fn(
            objective_function, current_simplex, current_objective_values,
            face_centroid, best_index, worst_index, reflected,
            objective_at_reflected, contraction, shrinkage,
            batch_evaluate_objective)
        case3 = do_outside_contraction, outside_contraction_fn
        default_fn = _inside_contraction_fn(
            objective_function, current_simplex, current_objective_values,
            face_centroid, best_index, worst_index, worst_objective_value,
            contraction, shrinkage, batch_evaluate_objective)
        (converged, next_simplex, next_objective_at_simplex,
         case_evals) = prefer_static.case([case0, case1, case2, case3],
                                          default=default_fn,
                                          exclusive=False)
        next_simplex.set_shape(current_simplex.shape)
        next_objective_at_simplex.set_shape(current_objective_values.shape)
        return (converged, next_simplex, next_objective_at_simplex,
                num_evaluations + case_evals)
コード例 #6
0
ファイル: pruning.py プロジェクト: LONG-9621/Stackedcapsule
    def _maybe_update_block_mask(self, weights, threshold, gradients=None):
        """Performs block-granular masking of the weights.

    Block pruning occurs only if the block_height or block_width is > 1 and
    if the weight tensor, when squeezed, has ndims = 2. Otherwise, elementwise
    pruning occurs.
    Args:
      weights: The weight tensor that needs to be masked.
      threshold: The current threshold value. The function will compute a new
        threshold and return the exponential moving average using the current
        value of threshold
      gradients: The gradient tensor that used for salience calculation.

    Returns:
      new_threshold: The new value of the threshold based on weights, and
        sparsity at the current global_step
      new_mask: A numpy array of the same size and shape as weights containing
        0 or 1 to indicate which of the values in weights falls below
        the threshold

    Raises:
      ValueError: if block pooling function is not AVG or MAX
    """

        block_dims = self._get_block_dims(weights.op.name)
        squeezed_weights = tf.squeeze(weights)
        if squeezed_weights.get_shape().ndims != 2 or block_dims == [1, 1]:
            return self._update_mask(weights, threshold, gradients)

        if (self._spec.prune_option
                in ('first_order_gradient', 'second_order_gradient')
                and gradients is None):
            raise ValueError(
                'Gradient based pruning implementation for block sparsity is not supported.'
            )

        for i in range(2):
            if block_dims[i] == -1:
                block_dims[i] = squeezed_weights.get_shape()[i]

        if self._block_pooling_function not in ['AVG', 'MAX']:
            raise ValueError(
                'Unknown pooling function for block sparsity: %s' %
                self._block_pooling_function)

        with tf.name_scope(weights.op.name + '_pruning_ops'):
            abs_weights = tf.abs(squeezed_weights)
            if gradients is not None:
                abs_gradients = tf.abs(tf.squeeze(gradients))

            pool_window = block_dims
            pool_fn = pruning_utils.factorized_pool
            squeeze_axis = None
            if not self._spec.use_tpu:
                pool_fn = tf.nn.pool
                abs_weights = tf.reshape(abs_weights, [
                    1,
                    abs_weights.get_shape()[0],
                    abs_weights.get_shape()[1], 1
                ])
                if gradients is not None:
                    # Reshape gradients to be a rank 4 tensor of shape [1, .., .., 1].
                    abs_gradients = tf.reshape(abs_gradients, [
                        1,
                        gradients.get_shape()[0],
                        gradients.get_shape()[1], 1
                    ])
                squeeze_axis = [0, 3]

            pooled_weights = pool_fn(abs_weights,
                                     window_shape=pool_window,
                                     pooling_type=self._block_pooling_function,
                                     strides=pool_window,
                                     padding='SAME',
                                     name=weights.op.name + '_pooled')

            if gradients is not None:
                pooled_gradients = pool_fn(
                    abs_gradients,
                    window_shape=pool_window,
                    pooling_type=self._block_pooling_function,
                    strides=pool_window,
                    padding='SAME',
                    name=gradients.op.name + '_pooled')
            else:
                pooled_gradients = None

            if pooled_weights.get_shape().ndims != 2:
                pooled_weights = tf.squeeze(pooled_weights, axis=squeeze_axis)

            if gradients is not None and pooled_gradients.get_shape(
            ).ndims != 2:
                pooled_gradients = tf.squeeze(pooled_gradients,
                                              axis=squeeze_axis)

            smoothed_threshold, new_mask = self._update_mask(
                pooled_weights, threshold, pooled_gradients)

            updated_mask = pruning_utils.expand_tensor(new_mask, block_dims)
            sliced_mask = tf.slice(updated_mask, [0, 0], [
                squeezed_weights.get_shape()[0],
                squeezed_weights.get_shape()[1]
            ])

        return smoothed_threshold, tf.reshape(sliced_mask, tf.shape(weights))
コード例 #7
0
ファイル: lstm.py プロジェクト: YuoMamoru/tf_stady
    def build_graph(self,
                    wordvec_size=100,
                    hidden_size=100,
                    time_size=5,
                    optimizer=None):
        """Buid tensorflow graph.

        Args:
            wordvec_size (int): Dimension of Distributed Represendations of
                the words
            hidden_size (int): Dimension of hidden layer
            time_size (int): Count to expand truncated BPTT
            optimizer: Optimizer instance. Default to tf.train.Adam
        """
        self.wordvec_size = wordvec_size
        self.hidden_size = hidden_size
        self.time_size = time_size

        self.learning_rate = tf.placeholder(tf.float32)
        incomes = tf.placeholder(
            tf.int32,
            shape=(None, time_size),
            name='incomes',
        )
        labels = tf.placeholder(
            tf.int32,
            shape=(None, time_size),
            name='labels',
        )
        prev_h = tf.placeholder(tf.float32,
                                shape=(None, hidden_size),
                                name='prev_h')
        self.prev_h = prev_h
        prev_c = tf.placeholder(tf.float32,
                                shape=(None, hidden_size),
                                name='prev_c')
        self.prev_c = prev_c

        with tf.name_scope('TimeEmbedding'):
            embed_W = tf.Variable(
                np.random.randn(self.vocab_size, wordvec_size) / 100,
                dtype=tf.float32,
                name='embed_W',
            )
            xs = tf.gather(embed_W, incomes)

        with tf.name_scope('TimeLSTM'):
            self.lstm_Wx = tf.Variable(
                randn(4, wordvec_size, hidden_size) / sqrt(wordvec_size),
                dtype=tf.float32,
                name='lstm_Wx',
            )
            self.lstm_Wh = tf.Variable(
                randn(4, hidden_size, hidden_size) / sqrt(hidden_size),
                dtype=tf.float32,
                name='lstm_Wh',
            )
            self.lstm_bias = tf.Variable(
                np.zeros([4, hidden_size]),
                dtype=tf.float32,
                name='lstm_bias',
            )

            h = prev_h
            c = prev_c
            time_h = []
            time_c = []
            for i in range(time_size):
                next_h, next_c = self.lstm(xs[:, i, :], c, h)
                time_h.append(next_h)
                time_c.append(next_c)
                h = time_h[-1]
                c = time_c[-1]
            hs = tf.stack(time_h, 1)
            self.next_h = hs[:, time_size - 1, :]
            self.next_c = time_c[time_size - 1]

        with tf.name_scope('TimeAffine'):
            affine_W = tf.Variable(
                randn(hidden_size, self.vocab_size) / sqrt(hidden_size),
                dtype=tf.float32,
                name='affine_W',
            )
            affine_bias = tf.Variable(
                np.zeros(self.vocab_size),
                dtype=tf.float32,
                name='affine_bias',
            )
            logits = tf.math.add(
                tf.matmul(hs, affine_W),
                affine_bias,
                name='logits',
            )

        cee = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=tf.reshape(labels, [-1]),
                logits=tf.reshape(logits, [-1, self.vocab_size]),
            ),
            name='CEE',
        )
        self.los_summaries = [
            tf.summary.scalar('Perplexity', tf.math.exp(cee), family='Loss'),
            tf.summary.scalar('CrossEntorpyError', cee, family='Loss'),
        ]

        self.incomes = incomes
        self.labels = labels

        if optimizer is None:
            optimizer = tf.train.AdamOptimizer(
                learning_rate=self.learning_rate)
        grads_vars = optimizer.compute_gradients(cee, tf.trainable_variables())
        with tf.name_scope('GradientsClip'):
            clipped_grads_vars = [(tf.clip_by_norm(grad, 0.25), var)
                                  for grad, var in grads_vars]
        self.training_op = optimizer.apply_gradients(clipped_grads_vars)
コード例 #8
0
def expanded_conv(input_tensor,
                  num_outputs,
                  expansion_size=expand_input_by_factor(6),
                  stride=1,
                  rate=1,
                  kernel_size=(3, 3),
                  residual=True,
                  normalizer_fn=None,
                  split_projection=1,
                  split_expansion=1,
                  expansion_transform=None,
                  depthwise_location='expansion',
                  depthwise_channel_multiplier=1,
                  endpoints=None,
                  use_explicit_padding=False,
                  padding='SAME',
                  scope=None):
  """Depthwise Convolution Block with expansion.

  Builds a composite convolution that has the following structure
  expansion (1x1) -> depthwise (kernel_size) -> projection (1x1)
  Args:
    input_tensor: input
    num_outputs: number of outputs in the final layer.
    expansion_size: the size of expansion, could be a constant or a callable. If
      latter it will be provided 'num_inputs' as an input. For forward
      compatibility it should accept arbitrary keyword arguments. Default will
      expand the input by factor of 6.
    stride: depthwise stride
    rate: depthwise rate
    kernel_size: depthwise kernel
    residual: whether to include residual connection between input and output.
    normalizer_fn: batchnorm or otherwise
    split_projection: how many ways to split projection operator (that is conv
      expansion->bottleneck)
    split_expansion: how many ways to split expansion op (that is conv
      bottleneck->expansion) ops will keep depth divisible by this value.
    expansion_transform: Optional function that takes expansion as a single
      input and returns output.
    depthwise_location: where to put depthwise covnvolutions supported values
      None, 'input', 'output', 'expansion'
    depthwise_channel_multiplier: depthwise channel multiplier: each input will
      replicated (with different filters) that many times. So if input had c
      channels, output will have c x depthwise_channel_multpilier.
    endpoints: An optional dictionary into which intermediate endpoints are
      placed. The keys "expansion_output", "depthwise_output",
      "projection_output" and "expansion_transform" are always populated, even
      if the corresponding functions are not invoked.
    use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
      inputs so that the output dimensions are the same as if 'SAME' padding
      were used.
    padding: Padding type to use if `use_explicit_padding` is not set.
    scope: optional scope.

  Returns:
    Tensor of depth num_outputs
  Raises:
    TypeError: on inval
  """
  with tf.variable_scope(scope, default_name='expanded_conv') as s, \
       tf.name_scope(s.original_name_scope):
    prev_depth = input_tensor.get_shape().as_list()[3]
    if depthwise_location not in [None, 'input', 'output', 'expansion']:
      raise TypeError(
          '%r is unknown value for depthwise_location' % depthwise_location)
    if use_explicit_padding:
      if padding != 'SAME':
        raise TypeError('`use_explicit_padding` should only be used with '
                        '"SAME" padding.')
      padding = 'VALID'
    depthwise_func = functools.partial(
        slim.separable_conv2d,
        num_outputs=None,
        kernel_size=kernel_size,
        depth_multiplier=depthwise_channel_multiplier,
        stride=stride,
        rate=rate,
        normalizer_fn=normalizer_fn,
        padding=padding,
        scope='depthwise')
    # b1 -> b2 * r -> b2
    #   i -> (o * r) (bottleneck) -> o
    input_tensor = tf.identity(input_tensor, 'input')
    net = input_tensor

    if depthwise_location == 'input':
      if use_explicit_padding:
        net = _fixed_padding(net, kernel_size, rate)
      net = depthwise_func(net, activation_fn=None)

    if callable(expansion_size):
      inner_size = expansion_size(num_inputs=prev_depth)
    else:
      inner_size = expansion_size

    if inner_size > net.shape[3]:
      net = split_conv(
          net,
          inner_size,
          num_ways=split_expansion,
          scope='expand',
          stride=1,
          normalizer_fn=normalizer_fn)
      net = tf.identity(net, 'expansion_output')
    if endpoints is not None:
      endpoints['expansion_output'] = net

    if depthwise_location == 'expansion':
      if use_explicit_padding:
        net = _fixed_padding(net, kernel_size, rate)
      net = depthwise_func(net)

    net = tf.identity(net, name='depthwise_output')
    if endpoints is not None:
      endpoints['depthwise_output'] = net
    if expansion_transform:
      net = expansion_transform(expansion_tensor=net, input_tensor=input_tensor)
    # Note in contrast with expansion, we always have
    # projection to produce the desired output size.
    net = split_conv(
        net,
        num_outputs,
        num_ways=split_projection,
        stride=1,
        scope='project',
        normalizer_fn=normalizer_fn,
        activation_fn=tf.identity)
    if endpoints is not None:
      endpoints['projection_output'] = net
    if depthwise_location == 'output':
      if use_explicit_padding:
        net = _fixed_padding(net, kernel_size, rate)
      net = depthwise_func(net, activation_fn=None)

    if callable(residual):  # custom residual
      net = residual(input_tensor=input_tensor, output_tensor=net)
    elif (
        residual and
        # stride check enforces that we don't add residuals when spatial
        # dimensions are None
        stride == 1 and
        # Depth matches
        net.get_shape().as_list()[3] == input_tensor.get_shape().as_list()[3]):
      net += input_tensor
    return tf.identity(net, name='output')
コード例 #9
0
def style_prediction(style_input_,
                     activation_names,
                     activation_depths,
                     is_training=True,
                     trainable=True,
                     inception_end_point='Mixed_6e',
                     style_prediction_bottleneck=100,
                     reuse=None):
    """Maps style images to the style embeddings (beta and gamma parameters).

  Args:
    style_input_: Tensor. Batch of style input images.
    activation_names: string. Scope names of the activations of the transformer
        network which are used to apply style normalization.
    activation_depths: Shapes of the activations of the transformer network
        which are used to apply style normalization.
    is_training: bool. Is it training phase or not?
    trainable: bool. Should the parameters be marked as trainable?
    inception_end_point: string. Specifies the endpoint to construct the
        inception_v3 network up to. This network is part of the style prediction
        network.
    style_prediction_bottleneck: int. Specifies the bottleneck size in the
        number of parameters of the style embedding.
    reuse: bool. Whether to reuse model parameters. Defaults to False.

  Returns:
    Tensor for the output of the style prediction network, Tensor for the
        bottleneck of style parameters of the style prediction network.
  """
    with tf.name_scope('style_prediction') and tf.variable_scope(
            tf.get_variable_scope(), reuse=reuse):
        with slim.arg_scope(_inception_v3_arg_scope(is_training=is_training)):
            with slim.arg_scope(
                [slim.conv2d, slim.fully_connected, slim.batch_norm],
                    trainable=trainable):
                with slim.arg_scope([slim.batch_norm, slim.dropout],
                                    is_training=is_training):
                    _, end_points = inception_v3.inception_v3_base(
                        style_input_,
                        scope='InceptionV3',
                        final_endpoint=inception_end_point)

        # Shape of feat_convlayer is (batch_size, ?, ?, depth).
        # For Mixed_6e end point, depth is 768, for input image size of 256x265
        # width and height are 14x14.
        feat_convlayer = end_points[inception_end_point]
        with tf.name_scope('bottleneck'):
            # (batch_size, 1, 1, depth).
            bottleneck_feat = tf.reduce_mean(feat_convlayer,
                                             axis=[1, 2],
                                             keep_dims=True)

        if style_prediction_bottleneck > 0:
            with slim.arg_scope([slim.conv2d],
                                activation_fn=None,
                                normalizer_fn=None,
                                trainable=trainable):
                # (batch_size, 1, 1, style_prediction_bottleneck).
                bottleneck_feat = slim.conv2d(bottleneck_feat,
                                              style_prediction_bottleneck,
                                              [1, 1])

        style_params = {}
        with tf.variable_scope('style_params'):
            for i in range(len(activation_depths)):
                with tf.variable_scope(activation_names[i], reuse=reuse):
                    with slim.arg_scope([slim.conv2d],
                                        activation_fn=None,
                                        normalizer_fn=None,
                                        trainable=trainable):

                        # Computing beta parameter of the style normalization for the
                        # activation_names[i] layer of the style transformer network.
                        # (batch_size, 1, 1, activation_depths[i])
                        beta = slim.conv2d(bottleneck_feat,
                                           activation_depths[i], [1, 1])
                        # (batch_size, activation_depths[i])
                        beta = tf.squeeze(beta, [1, 2], name='SpatialSqueeze')
                        style_params['{}/beta'.format(
                            activation_names[i])] = beta

                        # Computing gamma parameter of the style normalization for the
                        # activation_names[i] layer of the style transformer network.
                        # (batch_size, 1, 1, activation_depths[i])
                        gamma = slim.conv2d(bottleneck_feat,
                                            activation_depths[i], [1, 1])
                        # (batch_size, activation_depths[i])
                        gamma = tf.squeeze(gamma, [1, 2],
                                           name='SpatialSqueeze')
                        style_params['{}/gamma'.format(
                            activation_names[i])] = gamma

    return style_params, bottleneck_feat
コード例 #10
0
def inception_v3(inputs,
                 dropout_keep_prob=0.8,
                 num_classes=1000,
                 is_training=True,
                 restore_logits=True,
                 scope=''):
    """Latest Inception from http://arxiv.org/abs/1512.00567.

    "Rethinking the Inception Architecture for Computer Vision"

    Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens,
    Zbigniew Wojna

  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    dropout_keep_prob: dropout keep_prob.
    num_classes: number of predicted classes.
    is_training: whether is training or not.
    restore_logits: whether or not the logits layers should be restored.
      Useful for fine-tuning a model with different num_classes.
    scope: Optional scope for name_scope.

  Returns:
    a list containing 'logits', 'aux_logits' Tensors.
  """
    # end_points will collect relevant activations for external use, for example
    # summaries or losses.
    end_points = {}
    with tf.name_scope(scope, 'inception_v3', [inputs]):
        with scopes.arg_scope(
            [ops.conv2d, ops.fc, ops.batch_norm, ops.dropout],
                is_training=is_training):
            with scopes.arg_scope([ops.conv2d, ops.max_pool, ops.avg_pool],
                                  stride=1,
                                  padding='VALID'):
                # 299 x 299 x 3
                end_points['conv0'] = ops.conv2d(inputs,
                                                 32, [3, 3],
                                                 stride=2,
                                                 scope='conv0')
                # 149 x 149 x 32
                end_points['conv1'] = ops.conv2d(end_points['conv0'],
                                                 32, [3, 3],
                                                 scope='conv1')
                # 147 x 147 x 32
                end_points['conv2'] = ops.conv2d(end_points['conv1'],
                                                 64, [3, 3],
                                                 padding='SAME',
                                                 scope='conv2')
                # 147 x 147 x 64
                end_points['pool1'] = ops.max_pool(end_points['conv2'], [3, 3],
                                                   stride=2,
                                                   scope='pool1')
                # 73 x 73 x 64
                end_points['conv3'] = ops.conv2d(end_points['pool1'],
                                                 80, [1, 1],
                                                 scope='conv3')
                # 73 x 73 x 80.
                end_points['conv4'] = ops.conv2d(end_points['conv3'],
                                                 192, [3, 3],
                                                 scope='conv4')
                # 71 x 71 x 192.
                end_points['pool2'] = ops.max_pool(end_points['conv4'], [3, 3],
                                                   stride=2,
                                                   scope='pool2')
                # 35 x 35 x 192.
                net = end_points['pool2']
            # Inception blocks
            with scopes.arg_scope([ops.conv2d, ops.max_pool, ops.avg_pool],
                                  stride=1,
                                  padding='SAME'):
                # mixed: 35 x 35 x 256.
                with tf.variable_scope('mixed_35x35x256a'):
                    with tf.variable_scope('branch1x1'):
                        branch1x1 = ops.conv2d(net, 64, [1, 1])
                    with tf.variable_scope('branch5x5'):
                        branch5x5 = ops.conv2d(net, 48, [1, 1])
                        branch5x5 = ops.conv2d(branch5x5, 64, [5, 5])
                    with tf.variable_scope('branch3x3dbl'):
                        branch3x3dbl = ops.conv2d(net, 64, [1, 1])
                        branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3])
                        branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3])
                    with tf.variable_scope('branch_pool'):
                        branch_pool = ops.avg_pool(net, [3, 3])
                        branch_pool = ops.conv2d(branch_pool, 32, [1, 1])
                    net = tf.concat(
                        [branch1x1, branch5x5, branch3x3dbl, branch_pool], 3)
                    end_points['mixed_35x35x256a'] = net
                # mixed_1: 35 x 35 x 288.
                with tf.variable_scope('mixed_35x35x288a'):
                    with tf.variable_scope('branch1x1'):
                        branch1x1 = ops.conv2d(net, 64, [1, 1])
                    with tf.variable_scope('branch5x5'):
                        branch5x5 = ops.conv2d(net, 48, [1, 1])
                        branch5x5 = ops.conv2d(branch5x5, 64, [5, 5])
                    with tf.variable_scope('branch3x3dbl'):
                        branch3x3dbl = ops.conv2d(net, 64, [1, 1])
                        branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3])
                        branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3])
                    with tf.variable_scope('branch_pool'):
                        branch_pool = ops.avg_pool(net, [3, 3])
                        branch_pool = ops.conv2d(branch_pool, 64, [1, 1])
                    net = tf.concat(
                        [branch1x1, branch5x5, branch3x3dbl, branch_pool], 3)
                    end_points['mixed_35x35x288a'] = net
                # mixed_2: 35 x 35 x 288.
                with tf.variable_scope('mixed_35x35x288b'):
                    with tf.variable_scope('branch1x1'):
                        branch1x1 = ops.conv2d(net, 64, [1, 1])
                    with tf.variable_scope('branch5x5'):
                        branch5x5 = ops.conv2d(net, 48, [1, 1])
                        branch5x5 = ops.conv2d(branch5x5, 64, [5, 5])
                    with tf.variable_scope('branch3x3dbl'):
                        branch3x3dbl = ops.conv2d(net, 64, [1, 1])
                        branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3])
                        branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3])
                    with tf.variable_scope('branch_pool'):
                        branch_pool = ops.avg_pool(net, [3, 3])
                        branch_pool = ops.conv2d(branch_pool, 64, [1, 1])
                    net = tf.concat(
                        [branch1x1, branch5x5, branch3x3dbl, branch_pool], 3)
                    end_points['mixed_35x35x288b'] = net
                # mixed_3: 17 x 17 x 768.
                with tf.variable_scope('mixed_17x17x768a'):
                    with tf.variable_scope('branch3x3'):
                        branch3x3 = ops.conv2d(net,
                                               384, [3, 3],
                                               stride=2,
                                               padding='VALID')
                    with tf.variable_scope('branch3x3dbl'):
                        branch3x3dbl = ops.conv2d(net, 64, [1, 1])
                        branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3])
                        branch3x3dbl = ops.conv2d(branch3x3dbl,
                                                  96, [3, 3],
                                                  stride=2,
                                                  padding='VALID')
                    with tf.variable_scope('branch_pool'):
                        branch_pool = ops.max_pool(net, [3, 3],
                                                   stride=2,
                                                   padding='VALID')
                    net = tf.concat([branch3x3, branch3x3dbl, branch_pool], 3)
                    end_points['mixed_17x17x768a'] = net
                # mixed4: 17 x 17 x 768.
                with tf.variable_scope('mixed_17x17x768b'):
                    with tf.variable_scope('branch1x1'):
                        branch1x1 = ops.conv2d(net, 192, [1, 1])
                    with tf.variable_scope('branch7x7'):
                        branch7x7 = ops.conv2d(net, 128, [1, 1])
                        branch7x7 = ops.conv2d(branch7x7, 128, [1, 7])
                        branch7x7 = ops.conv2d(branch7x7, 192, [7, 1])
                    with tf.variable_scope('branch7x7dbl'):
                        branch7x7dbl = ops.conv2d(net, 128, [1, 1])
                        branch7x7dbl = ops.conv2d(branch7x7dbl, 128, [7, 1])
                        branch7x7dbl = ops.conv2d(branch7x7dbl, 128, [1, 7])
                        branch7x7dbl = ops.conv2d(branch7x7dbl, 128, [7, 1])
                        branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7])
                    with tf.variable_scope('branch_pool'):
                        branch_pool = ops.avg_pool(net, [3, 3])
                        branch_pool = ops.conv2d(branch_pool, 192, [1, 1])
                    net = tf.concat(
                        [branch1x1, branch7x7, branch7x7dbl, branch_pool], 3)
                    end_points['mixed_17x17x768b'] = net
                # mixed_5: 17 x 17 x 768.
                with tf.variable_scope('mixed_17x17x768c'):
                    with tf.variable_scope('branch1x1'):
                        branch1x1 = ops.conv2d(net, 192, [1, 1])
                    with tf.variable_scope('branch7x7'):
                        branch7x7 = ops.conv2d(net, 160, [1, 1])
                        branch7x7 = ops.conv2d(branch7x7, 160, [1, 7])
                        branch7x7 = ops.conv2d(branch7x7, 192, [7, 1])
                    with tf.variable_scope('branch7x7dbl'):
                        branch7x7dbl = ops.conv2d(net, 160, [1, 1])
                        branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1])
                        branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [1, 7])
                        branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1])
                        branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7])
                    with tf.variable_scope('branch_pool'):
                        branch_pool = ops.avg_pool(net, [3, 3])
                        branch_pool = ops.conv2d(branch_pool, 192, [1, 1])
                    net = tf.concat(
                        [branch1x1, branch7x7, branch7x7dbl, branch_pool], 3)
                    end_points['mixed_17x17x768c'] = net
                # mixed_6: 17 x 17 x 768.
                with tf.variable_scope('mixed_17x17x768d'):
                    with tf.variable_scope('branch1x1'):
                        branch1x1 = ops.conv2d(net, 192, [1, 1])
                    with tf.variable_scope('branch7x7'):
                        branch7x7 = ops.conv2d(net, 160, [1, 1])
                        branch7x7 = ops.conv2d(branch7x7, 160, [1, 7])
                        branch7x7 = ops.conv2d(branch7x7, 192, [7, 1])
                    with tf.variable_scope('branch7x7dbl'):
                        branch7x7dbl = ops.conv2d(net, 160, [1, 1])
                        branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1])
                        branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [1, 7])
                        branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1])
                        branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7])
                    with tf.variable_scope('branch_pool'):
                        branch_pool = ops.avg_pool(net, [3, 3])
                        branch_pool = ops.conv2d(branch_pool, 192, [1, 1])
                    net = tf.concat(
                        [branch1x1, branch7x7, branch7x7dbl, branch_pool], 3)
                    end_points['mixed_17x17x768d'] = net
                # mixed_7: 17 x 17 x 768.
                with tf.variable_scope('mixed_17x17x768e'):
                    with tf.variable_scope('branch1x1'):
                        branch1x1 = ops.conv2d(net, 192, [1, 1])
                    with tf.variable_scope('branch7x7'):
                        branch7x7 = ops.conv2d(net, 192, [1, 1])
                        branch7x7 = ops.conv2d(branch7x7, 192, [1, 7])
                        branch7x7 = ops.conv2d(branch7x7, 192, [7, 1])
                    with tf.variable_scope('branch7x7dbl'):
                        branch7x7dbl = ops.conv2d(net, 192, [1, 1])
                        branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [7, 1])
                        branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7])
                        branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [7, 1])
                        branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7])
                    with tf.variable_scope('branch_pool'):
                        branch_pool = ops.avg_pool(net, [3, 3])
                        branch_pool = ops.conv2d(branch_pool, 192, [1, 1])
                    net = tf.concat(
                        [branch1x1, branch7x7, branch7x7dbl, branch_pool], 3)
                    end_points['mixed_17x17x768e'] = net
                # Auxiliary Head logits
                aux_logits = tf.identity(end_points['mixed_17x17x768e'])
                with tf.variable_scope('aux_logits'):
                    aux_logits = ops.avg_pool(aux_logits, [5, 5],
                                              stride=3,
                                              padding='VALID')
                    aux_logits = ops.conv2d(aux_logits,
                                            128, [1, 1],
                                            scope='proj')
                    # Shape of feature map before the final layer.
                    shape = aux_logits.get_shape()
                    aux_logits = ops.conv2d(aux_logits,
                                            768,
                                            shape[1:3],
                                            stddev=0.01,
                                            padding='VALID')
                    aux_logits = ops.flatten(aux_logits)
                    aux_logits = ops.fc(aux_logits,
                                        num_classes,
                                        activation=None,
                                        stddev=0.001,
                                        restore=restore_logits)
                    end_points['aux_logits'] = aux_logits
                # mixed_8: 8 x 8 x 1280.
                # Note that the scope below is not changed to not void previous
                # checkpoints.
                # (TODO) Fix the scope when appropriate.
                with tf.variable_scope('mixed_17x17x1280a'):
                    with tf.variable_scope('branch3x3'):
                        branch3x3 = ops.conv2d(net, 192, [1, 1])
                        branch3x3 = ops.conv2d(branch3x3,
                                               320, [3, 3],
                                               stride=2,
                                               padding='VALID')
                    with tf.variable_scope('branch7x7x3'):
                        branch7x7x3 = ops.conv2d(net, 192, [1, 1])
                        branch7x7x3 = ops.conv2d(branch7x7x3, 192, [1, 7])
                        branch7x7x3 = ops.conv2d(branch7x7x3, 192, [7, 1])
                        branch7x7x3 = ops.conv2d(branch7x7x3,
                                                 192, [3, 3],
                                                 stride=2,
                                                 padding='VALID')
                    with tf.variable_scope('branch_pool'):
                        branch_pool = ops.max_pool(net, [3, 3],
                                                   stride=2,
                                                   padding='VALID')
                    net = tf.concat([branch3x3, branch7x7x3, branch_pool], 3)
                    end_points['mixed_17x17x1280a'] = net
                # mixed_9: 8 x 8 x 2048.
                with tf.variable_scope('mixed_8x8x2048a'):
                    with tf.variable_scope('branch1x1'):
                        branch1x1 = ops.conv2d(net, 320, [1, 1])
                    with tf.variable_scope('branch3x3'):
                        branch3x3 = ops.conv2d(net, 384, [1, 1])
                        branch3x3 = tf.concat([
                            ops.conv2d(branch3x3, 384, [1, 3]),
                            ops.conv2d(branch3x3, 384, [3, 1])
                        ], 3)
                    with tf.variable_scope('branch3x3dbl'):
                        branch3x3dbl = ops.conv2d(net, 448, [1, 1])
                        branch3x3dbl = ops.conv2d(branch3x3dbl, 384, [3, 3])
                        branch3x3dbl = tf.concat([
                            ops.conv2d(branch3x3dbl, 384, [1, 3]),
                            ops.conv2d(branch3x3dbl, 384, [3, 1])
                        ], 3)
                    with tf.variable_scope('branch_pool'):
                        branch_pool = ops.avg_pool(net, [3, 3])
                        branch_pool = ops.conv2d(branch_pool, 192, [1, 1])
                    net = tf.concat(
                        [branch1x1, branch3x3, branch3x3dbl, branch_pool], 3)
                    end_points['mixed_8x8x2048a'] = net
                # mixed_10: 8 x 8 x 2048.
                with tf.variable_scope('mixed_8x8x2048b'):
                    with tf.variable_scope('branch1x1'):
                        branch1x1 = ops.conv2d(net, 320, [1, 1])
                    with tf.variable_scope('branch3x3'):
                        branch3x3 = ops.conv2d(net, 384, [1, 1])
                        branch3x3 = tf.concat([
                            ops.conv2d(branch3x3, 384, [1, 3]),
                            ops.conv2d(branch3x3, 384, [3, 1])
                        ], 3)
                    with tf.variable_scope('branch3x3dbl'):
                        branch3x3dbl = ops.conv2d(net, 448, [1, 1])
                        branch3x3dbl = ops.conv2d(branch3x3dbl, 384, [3, 3])
                        branch3x3dbl = tf.concat([
                            ops.conv2d(branch3x3dbl, 384, [1, 3]),
                            ops.conv2d(branch3x3dbl, 384, [3, 1])
                        ], 3)
                    with tf.variable_scope('branch_pool'):
                        branch_pool = ops.avg_pool(net, [3, 3])
                        branch_pool = ops.conv2d(branch_pool, 192, [1, 1])
                    net = tf.concat(
                        [branch1x1, branch3x3, branch3x3dbl, branch_pool], 3)
                    end_points['mixed_8x8x2048b'] = net
                # Final pooling and prediction
                with tf.variable_scope('logits'):
                    shape = net.get_shape()
                    net = ops.avg_pool(net,
                                       shape[1:3],
                                       padding='VALID',
                                       scope='pool')
                    # 1 x 1 x 2048
                    net = ops.dropout(net, dropout_keep_prob, scope='dropout')
                    net = ops.flatten(net, scope='flatten')
                    # 2048
                    logits = ops.fc(net,
                                    num_classes,
                                    activation=None,
                                    scope='logits',
                                    restore=restore_logits)
                    # 1000
                    end_points['logits'] = logits
                    end_points['predictions'] = tf.nn.softmax(
                        logits, name='predictions')
            return logits, end_points
コード例 #11
0
ファイル: auto_train.py プロジェクト: Jeffwang87/Autoencoder
def tf_d_suplu(x, name=None):
    with tf.name_scope(name, "d_suplu", [x]) as name:
        y = tf.py_func(np_d_suplu_32, [x], [tf.float32],
                       name=name,
                       stateful=False)
        return y[0]
コード例 #12
0
 def initialize(self, name=None):
     with tf.name_scope(name, "TrainingHelperInitialize"):
         finished = tf.equal(0, self._sequence_length)
         return (finished, self._zero_inputs)
コード例 #13
0
 def sample(self, time, outputs, name=None, **unused_kwargs):
     with tf.name_scope(name, "TrainingHelperSample", [time, outputs]):
         sample_ids = tf.cast(tf.argmax(outputs, axis=-1), tf.dtypes.int32)
         return sample_ids
コード例 #14
0
def resize_to_range(image,
                    masks=None,
                    min_dimension=None,
                    max_dimension=None,
                    method=tf.image.ResizeMethod.BILINEAR,
                    align_corners=False,
                    pad_to_max_dimension=False):
    """Resizes an image so its dimensions are within the provided value.

  The output size can be described by two cases:
  1. If the image can be rescaled so its minimum dimension is equal to the
     provided value without the other dimension exceeding max_dimension,
     then do so.
  2. Otherwise, resize so the largest dimension is equal to max_dimension.

  Args:
    image: A 3D tensor of shape [height, width, channels]
    masks: (optional) rank 3 float32 tensor with shape
           [num_instances, height, width] containing instance masks.
    min_dimension: (optional) (scalar) desired size of the smaller image
                   dimension.
    max_dimension: (optional) (scalar) maximum allowed size
                   of the larger image dimension.
    method: (optional) interpolation method used in resizing. Defaults to
            BILINEAR.
    align_corners: bool. If true, exactly align all 4 corners of the input
                   and output. Defaults to False.
    pad_to_max_dimension: Whether to resize the image and pad it with zeros
      so the resulting image is of the spatial size
      [max_dimension, max_dimension]. If masks are included they are padded
      similarly.

  Returns:
    Note that the position of the resized_image_shape changes based on whether
    masks are present.
    resized_image: A 3D tensor of shape [new_height, new_width, channels],
      where the image has been resized (with bilinear interpolation) so that
      min(new_height, new_width) == min_dimension or
      max(new_height, new_width) == max_dimension.
    resized_masks: If masks is not None, also outputs masks. A 3D tensor of
      shape [num_instances, new_height, new_width].
    resized_image_shape: A 1D tensor of shape [3] containing shape of the
      resized image.

  Raises:
    ValueError: if the image is not a 3D tensor.
  """
    if len(image.get_shape()) != 3:
        raise ValueError('Image should be 3D tensor')

    with tf.name_scope('ResizeToRange', values=[image, min_dimension]):
        if image.get_shape().is_fully_defined():
            new_size = _compute_new_static_size(image, min_dimension,
                                                max_dimension)
        else:
            new_size = _compute_new_dynamic_size(image, min_dimension,
                                                 max_dimension)
        new_image = tf.image.resize_images(image,
                                           new_size[:-1],
                                           method=method,
                                           align_corners=align_corners)

        if pad_to_max_dimension:
            new_image = tf.image.pad_to_bounding_box(new_image, 0, 0,
                                                     max_dimension,
                                                     max_dimension)

        result = [new_image]
        if masks is not None:
            new_masks = tf.expand_dims(masks, 3)
            new_masks = tf.image.resize_images(
                new_masks,
                new_size[:-1],
                method=tf.image.ResizeMethod.NEAREST_NEIGHBOR,
                align_corners=align_corners)
            new_masks = tf.squeeze(new_masks, 3)
            if pad_to_max_dimension:
                new_masks = tf.image.pad_to_bounding_box(
                    new_masks, 0, 0, max_dimension, max_dimension)
            result.append(new_masks)

        result.append(new_size)
        return result
コード例 #15
0
def _model_fn(features, labels, mode, params, model, variable_filter_fn=None):
    """Model definition entry.

  Args:
    features: the input image tensor with shape [batch_size, height, width, 3].
      The height and width are fixed and equal.
    labels: the input labels in a dictionary. The labels include class targets
      and box targets which are dense label maps. The labels are generated from
      get_input_fn function in data/dataloader.py
    mode: the mode of TPUEstimator including TRAIN, EVAL, and PREDICT.
    params: the dictionary defines hyperparameters of model. The default
      settings are in default_hparams function in this file.
    model: the model outputs class logits and box regression outputs.
    variable_filter_fn: the filter function that takes trainable_variables and
      returns the variable list after applying the filter rule.

  Returns:
    tpu_spec: the TPUEstimatorSpec to run training, evaluation, or prediction.

  Raises:
    RuntimeError: if both ckpt and backbone_ckpt are set.
  """
    utils.image('input_image', features)
    training_hooks = []
    if params['data_format'] == 'channels_first':
        features = tf.transpose(features, [0, 3, 1, 2])

    def _model_outputs(inputs):
        # Convert params (dict) to Config for easier access.
        return model(inputs, config=hparams_config.Config(params))

    precision = utils.get_precision(params['strategy'],
                                    params['mixed_precision'])
    cls_outputs, box_outputs = utils.build_model_with_precision(
        precision, _model_outputs, features, params['is_training_bn'])

    levels = cls_outputs.keys()
    for level in levels:
        cls_outputs[level] = tf.cast(cls_outputs[level], tf.float32)
        box_outputs[level] = tf.cast(box_outputs[level], tf.float32)

    # First check if it is in PREDICT mode.
    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {
            'image': features,
        }
        for level in levels:
            predictions['cls_outputs_%d' % level] = cls_outputs[level]
            predictions['box_outputs_%d' % level] = box_outputs[level]
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    # Set up training loss and learning rate.
    update_learning_rate_schedule_parameters(params)
    global_step = tf.train.get_or_create_global_step()
    learning_rate = learning_rate_schedule(params, global_step)

    # cls_loss and box_loss are for logging. only total_loss is optimized.
    det_loss, cls_loss, box_loss, box_iou_loss = detection_loss(
        cls_outputs, box_outputs, labels, params)
    reg_l2loss = reg_l2_loss(params['weight_decay'])
    total_loss = det_loss + reg_l2loss

    if mode == tf.estimator.ModeKeys.TRAIN:
        utils.scalar('lrn_rate', learning_rate)
        utils.scalar('trainloss/cls_loss', cls_loss)
        utils.scalar('trainloss/box_loss', box_loss)
        utils.scalar('trainloss/det_loss', det_loss)
        utils.scalar('trainloss/reg_l2_loss', reg_l2loss)
        utils.scalar('trainloss/loss', total_loss)
        if params['iou_loss_type']:
            utils.scalar('trainloss/box_iou_loss', box_iou_loss)

    moving_average_decay = params['moving_average_decay']
    if moving_average_decay:
        ema = tf.train.ExponentialMovingAverage(decay=moving_average_decay,
                                                num_updates=global_step)
        ema_vars = utils.get_ema_vars()
    if params['strategy'] == 'horovod':
        import horovod.tensorflow as hvd  # pylint: disable=g-import-not-at-top
        learning_rate = learning_rate * hvd.size()
    if mode == tf.estimator.ModeKeys.TRAIN:
        if params['optimizer'].lower() == 'sgd':
            optimizer = tf.train.MomentumOptimizer(learning_rate,
                                                   momentum=params['momentum'])
        elif params['optimizer'].lower() == 'adam':
            optimizer = tf.train.AdamOptimizer(learning_rate)
        else:
            raise ValueError('optimizers should be adam or sgd')

        if params['strategy'] == 'tpu':
            optimizer = tf.tpu.CrossShardOptimizer(optimizer)
        elif params['strategy'] == 'horovod':
            optimizer = hvd.DistributedOptimizer(optimizer)
            training_hooks = [hvd.BroadcastGlobalVariablesHook(0)]

        # Batch norm requires update_ops to be added as a train_op dependency.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        var_list = tf.trainable_variables()
        if variable_filter_fn:
            var_list = variable_filter_fn(var_list)

        if params.get('clip_gradients_norm', 0) > 0:
            logging.info('clip gradients norm by %f',
                         params['clip_gradients_norm'])
            grads_and_vars = optimizer.compute_gradients(total_loss, var_list)
            with tf.name_scope('clip'):
                grads = [gv[0] for gv in grads_and_vars]
                tvars = [gv[1] for gv in grads_and_vars]
                clipped_grads, gnorm = tf.clip_by_global_norm(
                    grads, params['clip_gradients_norm'])
                utils.scalar('gnorm', gnorm)
                grads_and_vars = list(zip(clipped_grads, tvars))

            with tf.control_dependencies(update_ops):
                train_op = optimizer.apply_gradients(grads_and_vars,
                                                     global_step)
        else:
            with tf.control_dependencies(update_ops):
                train_op = optimizer.minimize(total_loss,
                                              global_step,
                                              var_list=var_list)

        if moving_average_decay:
            with tf.control_dependencies([train_op]):
                train_op = ema.apply(ema_vars)

    else:
        train_op = None

    eval_metrics = None
    if mode == tf.estimator.ModeKeys.EVAL:

        def metric_fn(**kwargs):
            """Returns a dictionary that has the evaluation metrics."""
            batch_size = params['batch_size']
            if params['strategy'] == 'tpu':
                batch_size = params['batch_size'] * params['num_shards']
            eval_anchors = anchors.Anchors(params['min_level'],
                                           params['max_level'],
                                           params['num_scales'],
                                           params['aspect_ratios'],
                                           params['anchor_scale'],
                                           params['image_size'])
            anchor_labeler = anchors.AnchorLabeler(eval_anchors,
                                                   params['num_classes'])
            cls_loss = tf.metrics.mean(kwargs['cls_loss_repeat'])
            box_loss = tf.metrics.mean(kwargs['box_loss_repeat'])

            if params.get('testdev_dir', None):
                logging.info('Eval testdev_dir %s', params['testdev_dir'])
                coco_metrics = coco_metric_fn(
                    batch_size,
                    anchor_labeler,
                    params['val_json_file'],
                    testdev_dir=params['testdev_dir'],
                    nms_configs=params['nms_configs'],
                    **kwargs)
            else:
                logging.info('Eval val with groudtruths %s.',
                             params['val_json_file'])
                coco_metrics = coco_metric_fn(
                    batch_size,
                    anchor_labeler,
                    params['val_json_file'],
                    nms_configs=params['nms_configs'],
                    **kwargs)

            # Add metrics to output.
            output_metrics = {
                'cls_loss': cls_loss,
                'box_loss': box_loss,
            }
            output_metrics.update(coco_metrics)
            return output_metrics

        cls_loss_repeat = tf.reshape(
            tf.tile(tf.expand_dims(cls_loss, 0), [
                params['batch_size'],
            ]), [params['batch_size'], 1])
        box_loss_repeat = tf.reshape(
            tf.tile(tf.expand_dims(box_loss, 0), [
                params['batch_size'],
            ]), [params['batch_size'], 1])
        metric_fn_inputs = {
            'cls_loss_repeat': cls_loss_repeat,
            'box_loss_repeat': box_loss_repeat,
            'source_ids': labels['source_ids'],
            'groundtruth_data': labels['groundtruth_data'],
            'image_scales': labels['image_scales'],
        }
        add_metric_fn_inputs(params, cls_outputs, box_outputs,
                             metric_fn_inputs)
        eval_metrics = (metric_fn, metric_fn_inputs)

    checkpoint = params.get('ckpt') or params.get('backbone_ckpt')

    if checkpoint and mode == tf.estimator.ModeKeys.TRAIN:
        # Initialize the model from an EfficientDet or backbone checkpoint.
        if params.get('ckpt') and params.get('backbone_ckpt'):
            raise RuntimeError(
                '--backbone_ckpt and --checkpoint are mutually exclusive')

        if params.get('backbone_ckpt'):
            var_scope = params['backbone_name'] + '/'
            if params['ckpt_var_scope'] is None:
                # Use backbone name as default checkpoint scope.
                ckpt_scope = params['backbone_name'] + '/'
            else:
                ckpt_scope = params['ckpt_var_scope'] + '/'
        else:
            # Load every var in the given checkpoint
            var_scope = ckpt_scope = '/'

        def scaffold_fn():
            """Loads pretrained model through scaffold function."""
            logging.info('restore variables from %s', checkpoint)

            var_map = utils.get_ckpt_var_map(ckpt_path=checkpoint,
                                             ckpt_scope=ckpt_scope,
                                             var_scope=var_scope,
                                             var_exclude_expr=params.get(
                                                 'var_exclude_expr', None))

            tf.train.init_from_checkpoint(checkpoint, var_map)

            return tf.train.Scaffold()
    elif mode == tf.estimator.ModeKeys.EVAL and moving_average_decay:

        def scaffold_fn():
            """Load moving average variables for eval."""
            logging.info('Load EMA vars with ema_decay=%f',
                         moving_average_decay)
            restore_vars_dict = ema.variables_to_restore(ema_vars)
            saver = tf.train.Saver(restore_vars_dict)
            return tf.train.Scaffold(saver=saver)
    else:
        scaffold_fn = None

    if params['strategy'] != 'tpu':
        # Profile every 1K steps.
        profile_hook = tf.train.ProfilerHook(save_steps=1000,
                                             output_dir=params['model_dir'])
        training_hooks.append(profile_hook)

        # Report memory allocation if OOM
        class OomReportingHook(tf.estimator.SessionRunHook):
            def before_run(self, run_context):
                return tf.estimator.SessionRunArgs(
                    fetches=[],
                    options=tf.RunOptions(
                        report_tensor_allocations_upon_oom=True))

        training_hooks.append(OomReportingHook())

    return tf.estimator.tpu.TPUEstimatorSpec(mode=mode,
                                             loss=total_loss,
                                             train_op=train_op,
                                             eval_metrics=eval_metrics,
                                             host_call=utils.get_tpu_host_call(
                                                 global_step, params),
                                             scaffold_fn=scaffold_fn,
                                             training_hooks=training_hooks)
コード例 #16
0
def evaluate():
    """Evaluating function."""
    g = tf.Graph()
    ops_dict = {}
    with g.as_default():
        # Data set.
        if FLAGS.experiment_type == "mnist":
            config = mnist_config.ConfigDict()
            dataset = mnist.MNIST(data_dir=FLAGS.data_dir,
                                  subset=FLAGS.subset,
                                  batch_size=FLAGS.batch_size,
                                  is_training=False)
            model = mnist_model.MNISTNetwork(config)
            layers_names = [
                "conv_layer%d" % i
                for i in range(len(config.filter_sizes_conv_layers))
            ]
        images, labels, num_examples, num_classes = (dataset.images,
                                                     dataset.labels,
                                                     dataset.num_examples,
                                                     dataset.num_classes)

        logits, endpoints = model(images, is_training=False)
        layers_list = [images] + [endpoints[name] for name in layers_names]

        top1_op = tf.nn.in_top_k(logits, labels, 1)

        top1_op = tf.cast(top1_op, dtype=tf.float32)
        ops_dict["top1"] = (None, top1_op)
        accuracy_ph = tf.placeholder(tf.float32, None)
        ops_dict["top1_accuracy"] = (accuracy_ph, None)
        tf.summary.scalar("top1_accuracy", accuracy_ph)

        with tf.name_scope("optimizer"):
            global_step = tf.train.get_or_create_global_step()

        # Define losses.
        l2_loss_wt = config.l2_loss_wt
        xent_loss_wt = config.xent_loss_wt
        margin_loss_wt = config.margin_loss_wt
        gamma = config.gamma
        alpha = config.alpha
        top_k = config.top_k
        dist_norm = config.dist_norm
        with tf.name_scope("losses"):
            xent_loss = tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                               labels=labels))
            margin = margin_loss.large_margin(
                logits=logits,
                one_hot_labels=tf.one_hot(labels, num_classes),
                layers_list=layers_list,
                gamma=gamma,
                alpha_factor=alpha,
                top_k=top_k,
                dist_norm=dist_norm,
                epsilon=1e-6,
                layers_weights=[
                    np.prod(layer.get_shape().as_list()[1:])
                    for layer in layers_list
                ] if np.isinf(dist_norm) else None)
            l2_loss = 0.
            for v in tf.trainable_variables():
                tf.logging.info(v)
                l2_loss += tf.nn.l2_loss(v)

            total_loss = 0
            if xent_loss_wt > 0:
                total_loss += xent_loss_wt * xent_loss
            if margin_loss_wt > 0:
                total_loss += margin_loss_wt * margin
            if l2_loss_wt > 0:
                total_loss += l2_loss_wt * l2_loss

            xent_loss_ph = tf.placeholder(tf.float32, None)
            margin_loss_ph = tf.placeholder(tf.float32, None)
            l2_loss_ph = tf.placeholder(tf.float32, None)
            total_loss_ph = tf.placeholder(tf.float32, None)
            tf.summary.scalar("xent_loss", xent_loss_ph)
            tf.summary.scalar("margin_loss", margin_loss_ph)
            tf.summary.scalar("l2_loss", l2_loss_ph)
            tf.summary.scalar("total_loss", total_loss_ph)

            ops_dict["losses/xent_loss"] = (xent_loss_ph, xent_loss)
            ops_dict["losses/margin_loss"] = (margin_loss_ph, margin)
            ops_dict["losses/l2_loss"] = (l2_loss_ph, l2_loss)
            ops_dict["losses/total_loss"] = (total_loss_ph, total_loss)

        # Prepare evaluation session.
        merged_summary = tf.summary.merge_all()
        summary_writer = tf.summary.FileWriter(FLAGS.eval_dir,
                                               tf.get_default_graph())
        vars_to_save = tf.global_variables()
        saver = tf.train.Saver(var_list=vars_to_save)
        scaffold = tf.train.Scaffold(saver=saver)
        session_creator = tf.train.ChiefSessionCreator(
            scaffold=scaffold, checkpoint_dir=FLAGS.checkpoint_dir)
        while True:
            _eval_once(session_creator, ops_dict, summary_writer,
                       merged_summary, global_step, num_examples)
            if FLAGS.run_once:
                break
            time.sleep(FLAGS.eval_interval_secs)
コード例 #17
0
ファイル: pruning.py プロジェクト: LONG-9621/Stackedcapsule
    def _update_mask(self, weights, threshold, gradients):  # pylint: disable=unused-argument
        """Updates the mask for a given weight tensor.

    This functions first computes the cdf of the weight tensor, and estimates
    the threshold value such that 'desired_sparsity' fraction of weights
    have magnitude less than the threshold.

    Args:
      weights: The weight tensor that needs to be masked.
      threshold: The current threshold value. The function will compute a new
        threshold and return the exponential moving average using the current
        value of threshold
      gradients: The gradient tensor that is used for salience calculation.

    Returns:
      new_threshold: The new value of the threshold based on weights, and
        sparsity at the current global_step
      new_mask: A numpy array of the same size and shape as weights containing
        0 or 1 to indicate which of the values in weights falls below
        the threshold

    Raises:
      ValueError: if sparsity is not defined
    """
        if self._sparsity is None:
            raise ValueError('Sparsity variable undefined')

        sparsity = self._get_sparsity(weights.op.name)
        with tf.name_scope(weights.op.name + '_pruning_ops'):
            tf.logging.info('Applying option %s pruning',
                            self._spec.prune_option)
            if self._spec.prune_option == 'weight':
                abs_weights = tf.abs(weights)
            elif self._spec.prune_option in ('first_order_gradient',
                                             'second_order_gradient'):
                if gradients is None:
                    raise ValueError('gradient tensor cannot be None.')
                # gradient variable stores absolute value already
                abs_weights = tf.multiply(tf.abs(weights), gradients)
            else:
                raise ValueError('undefined option')

            k = tf.cast(
                tf.round(
                    tf.cast(tf.size(abs_weights), tf.float32) *
                    (1 - sparsity)), tf.int32)

            # Generate a random shuffling of the weights s.t. the tie-breaker on
            # weight magnitude is random uniform.
            shuffling = tf.random_shuffle(tf.range(tf.size(abs_weights)))
            shuffling = tf.reshape(shuffling, [-1, 1])

            # Flatten the weights and scatter the values randomly.
            abs_weights = tf.reshape(abs_weights, [-1])
            abs_weights = tf.scatter_nd(shuffling, abs_weights,
                                        tf.shape(abs_weights))

            # Sort the entire array
            _, indices = tf.nn.top_k(abs_weights, k=tf.size(abs_weights))

            # `k` is how many non-zero weights we're going to have. Create a new
            # mask where the first `k` elements are set to one and all others are
            # set to zero.
            mask_staging = tf.range(tf.size(abs_weights))
            mask_staging = tf.cast(tf.less(mask_staging, k), tf.float32)

            # Scatter the mask back into the proper positions for the weight matrix.
            indices = tf.reshape(indices, [-1, 1])
            new_mask = tf.scatter_nd(indices, mask_staging,
                                     tf.shape(mask_staging))

            # Un-shuffle the newly created mask.
            new_mask = tf.reshape(tf.gather_nd(new_mask, shuffling),
                                  tf.shape(weights))
        return tf.constant(0, tf.float32), new_mask
コード例 #18
0
# validation_images = vec1[:, :-2][:VALIDATION_SIZE]
# validation_labels = vec1[:, -2:][:VALIDATION_SIZE]
train = vec1[:, :][test_size:]
train_images = train[:, :-2]
train_labels = train[:, -2:]
# test_images = vec1[:, :-2][VALIDATION_SIZE:test_size]
# test_labels = vec1[:, -2:][VALIDATION_SIZE:test_size]

test_images = vec1[:, :-2][:test_size]
test_labels = vec1[:, -2:][:test_size]

import tensorflow.compat.v1 as tf

tf.disable_v2_behavior()

with tf.name_scope('inputs'):
    x = tf.placeholder(tf.float32, [None, 100], name='x_input')
    y_ = tf.placeholder(tf.float32, [None, 2], name='y_input')

    ##reshape image数据
    #To apply the layer, we first reshape x to a 4d tensor, with the second and third dimensions corresponding to image width and height,
    #and the final dimension corresponding to the number of color channels.-1表示任意数量的样本数
    x_image = tf.reshape(x, [-1, 10, 10, 1])


##定义weights和bias
#----Weight Initialization---#
#One should generally initialize weights with a small amount of noise for symmetry breaking, and to prevent 0 gradients
def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)
コード例 #19
0
def focal_loss(
    logits,
    targets,
    alpha,
    gamma,
    normalizer,
):
    """Compute the focal loss between `logits` and the golden `target` values.

  Focal loss = -(1-pt)^gamma * log(pt)
  where pt is the probability of being classified to the true class.

  Args:
    logits: A float32 tensor of size
      [batch, height_in, width_in, num_predictions].
    targets: A float32 tensor of size
      [batch, height_in, width_in, num_predictions].
    alpha: A float32 scalar multiplying alpha to the loss from positive examples
      and (1-alpha) to the loss from negative examples.
    gamma: A float32 scalar modulating loss from hard and easy examples.
    normalizer: A float32 scalar normalizes the total loss from all examples.

  Returns:
    loss: A float32 Tensor of size [batch, height_in, width_in, num_predictions]
      representing normalized loss on the prediction map.
  """
    with tf.name_scope('focal_loss'):
        positive_label_mask = tf.equal(targets, 1.0)
        cross_entropy = (tf.nn.sigmoid_cross_entropy_with_logits(
            labels=targets, logits=logits))
        # Below are comments/derivations for computing modulator.
        # For brevity, let x = logits,  z = targets, r = gamma, and p_t = sigmod(x)
        # for positive samples and 1 - sigmoid(x) for negative examples.
        #
        # The modulator, defined as (1 - P_t)^r, is a critical part in focal loss
        # computation. For r > 0, it puts more weights on hard examples, and less
        # weights on easier ones. However if it is directly computed as (1 - P_t)^r,
        # its back-propagation is not stable when r < 1. The implementation here
        # resolves the issue.
        #
        # For positive samples (labels being 1),
        #    (1 - p_t)^r
        #  = (1 - sigmoid(x))^r
        #  = (1 - (1 / (1 + exp(-x))))^r
        #  = (exp(-x) / (1 + exp(-x)))^r
        #  = exp(log((exp(-x) / (1 + exp(-x)))^r))
        #  = exp(r * log(exp(-x)) - r * log(1 + exp(-x)))
        #  = exp(- r * x - r * log(1 + exp(-x)))
        #
        # For negative samples (labels being 0),
        #    (1 - p_t)^r
        #  = (sigmoid(x))^r
        #  = (1 / (1 + exp(-x)))^r
        #  = exp(log((1 / (1 + exp(-x)))^r))
        #  = exp(-r * log(1 + exp(-x)))
        #
        # Therefore one unified form for positive (z = 1) and negative (z = 0)
        # samples is:
        #      (1 - p_t)^r = exp(-r * z * x - r * log(1 + exp(-x))).
        neg_logits = -1.0 * logits
        modulator = tf.exp(gamma * targets * neg_logits -
                           gamma * tf.math.softplus(neg_logits))
        loss = modulator * cross_entropy
        weighted_loss = tf.where(positive_label_mask, alpha * loss,
                                 (1.0 - alpha) * loss)
        weighted_loss /= normalizer + 1e-20
    return weighted_loss
コード例 #20
0
def mask_head(roi_features,
              class_indices,
              num_classes=91,
              mrcnn_resolution=28,
              is_gpu_inference=False):
    """Mask branch for the Mask-RCNN model.

  Args:
    roi_features: A ROI feature tensor of shape
      [batch_size, num_rois, height_l, width_l, num_filters].
    class_indices: a Tensor of shape [batch_size, num_rois], indicating
      which class the ROI is.
    num_classes: an integer for the number of classes.
    mrcnn_resolution: an integer that is the resolution of masks.
    is_gpu_inference: whether to build the model for GPU inference.
  Returns:
    mask_outputs: a tensor with a shape of
      [batch_size, num_masks, mask_height, mask_width],
      representing the mask predictions.
  """
    def _get_stddev_equivalent_to_msra_fill(kernel_size, fan_out):
        """Returns the stddev of random normal initialization as MSRAFill."""
        # Reference: https://github.com/pytorch/pytorch/blob/master/caffe2/operators/filler_op.h#L445-L463  # pylint: disable=line-too-long
        # For example, kernel size is (3, 3) and fan out is 256, stddev is 0.029.
        # stddev = (2/(3*3*256))^0.5 = 0.029
        return (2 / (kernel_size[0] * kernel_size[1] * fan_out))**0.5

    with tf.variable_scope('mask_head'):
        batch_size, num_rois, height, width, filters = (
            roi_features.get_shape().as_list())
        net = tf.reshape(roi_features, [-1, height, width, filters])

        for i in range(4):
            kernel_size = (3, 3)
            fan_out = 256
            init_stddev = _get_stddev_equivalent_to_msra_fill(
                kernel_size, fan_out)
            net = tf.layers.conv2d(
                net,
                fan_out,
                kernel_size=kernel_size,
                strides=(1, 1),
                padding='same',
                dilation_rate=(1, 1),
                activation=tf.nn.relu,
                kernel_initializer=tf.random_normal_initializer(
                    stddev=init_stddev),
                bias_initializer=tf.zeros_initializer(),
                name='mask-conv-l%d' % i)

        kernel_size = (2, 2)
        fan_out = 256
        init_stddev = _get_stddev_equivalent_to_msra_fill(kernel_size, fan_out)
        net = tf.layers.conv2d_transpose(
            net,
            fan_out,
            kernel_size=kernel_size,
            strides=(2, 2),
            padding='valid',
            activation=tf.nn.relu,
            kernel_initializer=tf.random_normal_initializer(
                stddev=init_stddev),
            bias_initializer=tf.zeros_initializer(),
            name='conv5-mask')

        kernel_size = (1, 1)
        fan_out = num_classes
        init_stddev = _get_stddev_equivalent_to_msra_fill(kernel_size, fan_out)
        mask_outputs = tf.layers.conv2d(
            net,
            fan_out,
            kernel_size=kernel_size,
            strides=(1, 1),
            padding='valid',
            kernel_initializer=tf.random_normal_initializer(
                stddev=init_stddev),
            bias_initializer=tf.zeros_initializer(),
            name='mask_fcn_logits')
        mask_outputs = tf.reshape(
            mask_outputs,
            [-1, num_rois, mrcnn_resolution, mrcnn_resolution, num_classes])

        indices_dtype = tf.float32 if is_gpu_inference else tf.int32
        with tf.name_scope('masks_post_processing'):
            mask_outputs = tf.transpose(mask_outputs, [0, 1, 4, 2, 3])
            if batch_size == 1:
                indices = tf.reshape(
                    tf.reshape(tf.range(num_rois, dtype=indices_dtype),
                               [batch_size, num_rois, 1]) * num_classes +
                    tf.expand_dims(class_indices, axis=-1), [batch_size, -1])
                # If using GPU for inference, delay the cast until when Gather ops show
                # up since GPU inference supports float point better.
                # TODO(laigd): revisit this when newer versions of GPU libraries is
                # released.
                if is_gpu_inference:
                    indices = tf.cast(indices, dtype=tf.int32)
                mask_outputs = tf.gather(tf.reshape(
                    mask_outputs,
                    [batch_size, -1, mrcnn_resolution, mrcnn_resolution]),
                                         indices,
                                         axis=1)
                mask_outputs = tf.squeeze(mask_outputs, axis=1)
                mask_outputs = tf.reshape(
                    mask_outputs,
                    [batch_size, num_rois, mrcnn_resolution, mrcnn_resolution])
            else:
                batch_indices = (tf.expand_dims(
                    tf.range(batch_size, dtype=indices_dtype), axis=1) *
                                 tf.ones([1, num_rois], dtype=indices_dtype))
                mask_indices = (tf.expand_dims(
                    tf.range(num_rois, dtype=indices_dtype), axis=0) *
                                tf.ones([batch_size, 1], dtype=indices_dtype))
                gather_indices = tf.stack(
                    [batch_indices, mask_indices, class_indices], axis=2)
                if is_gpu_inference:
                    gather_indices = tf.cast(gather_indices, dtype=tf.int32)
                mask_outputs = tf.gather_nd(mask_outputs, gather_indices)

        return mask_outputs
コード例 #21
0
            Wx_plus_b = tf.add(tf.matmul(inputs, Weights), biases)
        if activation_function is None:
            outputs = Wx_plus_b
        else:
            outputs = activation_function(Wx_plus_b, )
        tf.summary.histogram(layer_name + '/outputs', outputs)
    return outputs


# Make up some real data
x_data = np.linspace(-1, 1, 300)[:, np.newaxis]
noise = np.random.normal(0, 0.05, x_data.shape)
y_data = np.square(x_data) - 0.5 + noise

# define placeholder for inputs to network
with tf.name_scope('inputs'):
    xs = tf.placeholder(tf.float32, [None, 1], name='x_input')
    ys = tf.placeholder(tf.float32, [None, 1], name='y_input')

# add hidden layer
l1 = add_layer(xs, 1, 10, n_layer=1, activation_function=tf.nn.relu)
# add output layer
prediction = add_layer(l1, 10, 1, n_layer=2, activation_function=None)

# the error between prediciton and real data
with tf.name_scope('loss'):
    loss = tf.reduce_mean(
        tf.reduce_sum(tf.square(ys - prediction), reduction_indices=[1]))
    tf.summary.scalar('loss', loss)

with tf.name_scope('train'):
コード例 #22
0
def inception_model_fn(features, labels, mode, params):
  """Inception v3 model using Estimator API."""
  num_classes = FLAGS.num_classes
  is_training = (mode == tf.estimator.ModeKeys.TRAIN)
  is_eval = (mode == tf.estimator.ModeKeys.EVAL)

  if isinstance(features, dict):
    features = features['feature']

  features = tensor_transform_fn(features, params['input_perm'])

  # This nested function allows us to avoid duplicating the logic which
  # builds the network, for different values of --precision.
  def build_network():
    if FLAGS.precision == 'bfloat16':
      with contrib_tpu.bfloat16_scope():
        logits, end_points = inception.inception_v3(
            features,
            num_classes,
            is_training=is_training)
      logits = tf.cast(logits, tf.float32)
    elif FLAGS.precision == 'float32':
      logits, end_points = inception.inception_v3(
          features,
          num_classes,
          is_training=is_training)
    return logits, end_points

  if FLAGS.clear_update_collections:
    # updates_collections must be set to None in order to use fused batchnorm
    with arg_scope(inception.inception_v3_arg_scope(
        weight_decay=0.0,
        batch_norm_decay=BATCH_NORM_DECAY,
        batch_norm_epsilon=BATCH_NORM_EPSILON,
        updates_collections=None)):
      logits, end_points = build_network()
  else:
    with arg_scope(inception.inception_v3_arg_scope(
        batch_norm_decay=BATCH_NORM_DECAY,
        batch_norm_epsilon=BATCH_NORM_EPSILON)):
      logits, end_points = build_network()

  predictions = {
      'classes': tf.argmax(input=logits, axis=1),
      'probabilities': tf.nn.softmax(logits, name='softmax_tensor')
  }

  if mode == tf.estimator.ModeKeys.PREDICT:
    return tf.estimator.EstimatorSpec(
        mode=mode,
        predictions=predictions,
        export_outputs={
            'classify': tf.estimator.export.PredictOutput(predictions)
        })

  if mode == tf.estimator.ModeKeys.EVAL and FLAGS.display_tensors and (
      not FLAGS.use_tpu):
    with tf.control_dependencies([
        tf.Print(
            predictions['classes'], [predictions['classes']],
            summarize=FLAGS.eval_batch_size,
            message='prediction: ')
    ]):
      labels = tf.Print(
          labels, [labels], summarize=FLAGS.eval_batch_size, message='label: ')

  one_hot_labels = tf.one_hot(labels, FLAGS.num_classes, dtype=tf.int32)

  if 'AuxLogits' in end_points:
    tf.losses.softmax_cross_entropy(
        onehot_labels=one_hot_labels,
        logits=tf.cast(end_points['AuxLogits'], tf.float32),
        weights=0.4,
        label_smoothing=0.1,
        scope='aux_loss')

  tf.losses.softmax_cross_entropy(
      onehot_labels=one_hot_labels,
      logits=logits,
      weights=1.0,
      label_smoothing=0.1)

  losses = tf.add_n(tf.losses.get_losses())
  l2_loss = []
  for v in tf.trainable_variables():
    if 'BatchNorm' not in v.name and 'weights' in v.name:
      l2_loss.append(tf.nn.l2_loss(v))
  loss = losses + WEIGHT_DECAY * tf.add_n(l2_loss)

  initial_learning_rate = FLAGS.learning_rate * FLAGS.train_batch_size / 256
  if FLAGS.use_learning_rate_warmup:
    # Adjust initial learning rate to match final warmup rate
    warmup_decay = FLAGS.learning_rate_decay**(
        (FLAGS.warmup_epochs + FLAGS.cold_epochs) /
        FLAGS.learning_rate_decay_epochs)
    adj_initial_learning_rate = initial_learning_rate * warmup_decay

  final_learning_rate = 0.0001 * initial_learning_rate

  host_call = None
  train_op = None
  if is_training:
    batches_per_epoch = _NUM_TRAIN_IMAGES / FLAGS.train_batch_size
    global_step = tf.train.get_or_create_global_step()
    current_epoch = tf.cast(
        (tf.cast(global_step, tf.float32) / batches_per_epoch), tf.int32)

    learning_rate = tf.train.exponential_decay(
        learning_rate=initial_learning_rate,
        global_step=global_step,
        decay_steps=int(FLAGS.learning_rate_decay_epochs * batches_per_epoch),
        decay_rate=FLAGS.learning_rate_decay,
        staircase=True)

    if FLAGS.use_learning_rate_warmup:
      wlr = 0.1 * adj_initial_learning_rate
      wlr_height = tf.cast(
          0.9 * adj_initial_learning_rate /
          (FLAGS.warmup_epochs + FLAGS.learning_rate_decay_epochs - 1),
          tf.float32)
      epoch_offset = tf.cast(FLAGS.cold_epochs - 1, tf.int32)
      exp_decay_start = (FLAGS.warmup_epochs + FLAGS.cold_epochs +
                         FLAGS.learning_rate_decay_epochs)
      lin_inc_lr = tf.add(
          wlr, tf.multiply(
              tf.cast(tf.subtract(current_epoch, epoch_offset), tf.float32),
              wlr_height))
      learning_rate = tf.where(
          tf.greater_equal(current_epoch, FLAGS.cold_epochs),
          (tf.where(tf.greater_equal(current_epoch, exp_decay_start),
                    learning_rate, lin_inc_lr)),
          wlr)

    # Set a minimum boundary for the learning rate.
    learning_rate = tf.maximum(
        learning_rate, final_learning_rate, name='learning_rate')

    if FLAGS.optimizer == 'sgd':
      tf.logging.info('Using SGD optimizer')
      optimizer = tf.train.GradientDescentOptimizer(
          learning_rate=learning_rate)
    elif FLAGS.optimizer == 'momentum':
      tf.logging.info('Using Momentum optimizer')
      optimizer = tf.train.MomentumOptimizer(
          learning_rate=learning_rate, momentum=0.9)
    elif FLAGS.optimizer == 'RMS':
      tf.logging.info('Using RMS optimizer')
      optimizer = tf.train.RMSPropOptimizer(
          learning_rate,
          RMSPROP_DECAY,
          momentum=RMSPROP_MOMENTUM,
          epsilon=RMSPROP_EPSILON)
    else:
      tf.logging.fatal('Unknown optimizer:', FLAGS.optimizer)

    if FLAGS.use_tpu:
      optimizer = contrib_tpu.CrossShardOptimizer(optimizer)

    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
      train_op = optimizer.minimize(loss, global_step=global_step)
    if FLAGS.moving_average:
      ema = tf.train.ExponentialMovingAverage(
          decay=MOVING_AVERAGE_DECAY, num_updates=global_step)
      variables_to_average = (
          tf.trainable_variables() + tf.moving_average_variables())
      with tf.control_dependencies([train_op]), tf.name_scope('moving_average'):
        train_op = ema.apply(variables_to_average)

    # To log the loss, current learning rate, and epoch for Tensorboard, the
    # summary op needs to be run on the host CPU via host_call. host_call
    # expects [batch_size, ...] Tensors, thus reshape to introduce a batch
    # dimension. These Tensors are implicitly concatenated to
    # [params['batch_size']].
    gs_t = tf.reshape(global_step, [1])
    loss_t = tf.reshape(loss, [1])
    lr_t = tf.reshape(learning_rate, [1])
    ce_t = tf.reshape(current_epoch, [1])

    if not FLAGS.skip_host_call:
      def host_call_fn(gs, loss, lr, ce):
        """Training host call. Creates scalar summaries for training metrics.

        This function is executed on the CPU and should not directly reference
        any Tensors in the rest of the `model_fn`. To pass Tensors from the
        model to the `metric_fn`, provide them as part of the `host_call`. See
        https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
        for more information.

        Arguments should match the list of `Tensor` objects passed as the second
        element in the tuple passed to `host_call`.

        Args:
          gs: `Tensor with shape `[batch]` for the global_step
          loss: `Tensor` with shape `[batch]` for the training loss.
          lr: `Tensor` with shape `[batch]` for the learning_rate.
          ce: `Tensor` with shape `[batch]` for the current_epoch.

        Returns:
          List of summary ops to run on the CPU host.
        """
        gs = gs[0]
        with summary.create_file_writer(FLAGS.model_dir).as_default():
          with summary.always_record_summaries():
            summary.scalar('loss', tf.reduce_mean(loss), step=gs)
            summary.scalar('learning_rate', tf.reduce_mean(lr), step=gs)
            summary.scalar('current_epoch', tf.reduce_mean(ce), step=gs)

            return summary.all_summary_ops()

      host_call = (host_call_fn, [gs_t, loss_t, lr_t, ce_t])

  eval_metrics = None
  if is_eval:
    def metric_fn(labels, logits):
      """Evaluation metric function. Evaluates accuracy.

      This function is executed on the CPU and should not directly reference
      any Tensors in the rest of the `model_fn`. To pass Tensors from the model
      to the `metric_fn`, provide as part of the `eval_metrics`. See
      https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
      for more information.

      Arguments should match the list of `Tensor` objects passed as the second
      element in the tuple passed to `eval_metrics`.

      Args:
        labels: `Tensor` with shape `[batch, ]`.
        logits: `Tensor` with shape `[batch, num_classes]`.

      Returns:
        A dict of the metrics to return from evaluation.
      """
      predictions = tf.argmax(logits, axis=1)
      top_1_accuracy = tf.metrics.accuracy(labels, predictions)
      in_top_5 = tf.cast(tf.nn.in_top_k(logits, labels, 5), tf.float32)
      top_5_accuracy = tf.metrics.mean(in_top_5)

      return {
          'accuracy': top_1_accuracy,
          'accuracy@5': top_5_accuracy,
      }

    eval_metrics = (metric_fn, [labels, logits])

  return contrib_tpu.TPUEstimatorSpec(
      mode=mode,
      loss=loss,
      train_op=train_op,
      host_call=host_call,
      eval_metrics=eval_metrics)
コード例 #23
0
ファイル: main.py プロジェクト: anmol0709/OCR-SRGAN
                                name='inputs_raw')
    targets_raw = tf.placeholder(tf.float32,
                                 shape=[1, None, None, 3],
                                 name='targets_raw')
    path_LR = tf.placeholder(tf.string, shape=[], name='path_LR')
    path_HR = tf.placeholder(tf.string, shape=[], name='path_HR')

    with tf.variable_scope('generator'):
        if FLAGS.task == 'SRGAN' or FLAGS.task == 'SRResnet':
            gen_output = generator(inputs_raw, 3, reuse=False, FLAGS=FLAGS)
        else:
            raise NotImplementedError('Unknown task!!')

    print('Finish building the network')

    with tf.name_scope('convert_image'):
        # Deprocess the images outputed from the model
        inputs = deprocessLR(inputs_raw)
        targets = deprocess(targets_raw)
        outputs = deprocess(gen_output)

        # Convert back to uint8
        converted_inputs = tf.image.convert_image_dtype(inputs,
                                                        dtype=tf.uint8,
                                                        saturate=True)
        converted_targets = tf.image.convert_image_dtype(targets,
                                                         dtype=tf.uint8,
                                                         saturate=True)
        converted_outputs = tf.image.convert_image_dtype(outputs,
                                                         dtype=tf.uint8,
                                                         saturate=True)
コード例 #24
0
ファイル: modeling.py プロジェクト: HabanaAI/Model-References
def transformer_model(input_tensor,
                      attention_mask=None,
                      hidden_size=768,
                      num_hidden_layers=12,
                      num_hidden_groups=12,
                      num_attention_heads=12,
                      intermediate_size=3072,
                      inner_group_num=1,
                      intermediate_act_fn="gelu",
                      hidden_dropout_prob=0.1,
                      attention_probs_dropout_prob=0.1,
                      initializer_range=0.02,
                      do_return_all_layers=False,
                      use_einsum=True):
    """Multi-headed, multi-layer Transformer from "Attention is All You Need".

  This is almost an exact implementation of the original Transformer encoder.

  See the original paper:
  https://arxiv.org/abs/1706.03762

  Also see:
  https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/models/transformer.py

  Args:
    input_tensor: float Tensor of shape [batch_size, seq_length, hidden_size].
    attention_mask: (optional) int32 Tensor of shape [batch_size, seq_length],
      with 1 for positions that can be attended to and 0 in positions that
      should not be.
    hidden_size: int. Hidden size of the Transformer.
    num_hidden_layers: int. Number of layers (blocks) in the Transformer.
    num_hidden_groups: int. Number of group for the hidden layers, parameters
      in the same group are shared.
    num_attention_heads: int. Number of attention heads in the Transformer.
    intermediate_size: int. The size of the "intermediate" (a.k.a., feed
      forward) layer.
    inner_group_num: int, number of inner repetition of attention and ffn.
    intermediate_act_fn: function. The non-linear activation function to apply
      to the output of the intermediate/feed-forward layer.
    hidden_dropout_prob: float. Dropout probability for the hidden layers.
    attention_probs_dropout_prob: float. Dropout probability of the attention
      probabilities.
    initializer_range: float. Range of the initializer (stddev of truncated
      normal).
    do_return_all_layers: Whether to also return all layers or just the final
      layer.
    use_einsum: bool. Whether to use einsum or reshape+matmul for dense layers

  Returns:
    float Tensor of shape [batch_size, seq_length, hidden_size], the final
    hidden layer of the Transformer.

  Raises:
    ValueError: A Tensor shape or parameter is invalid.
  """
    if hidden_size % num_attention_heads != 0:
        raise ValueError(
            "The hidden size (%d) is not a multiple of the number of attention "
            "heads (%d)" % (hidden_size, num_attention_heads))

    attention_head_size = hidden_size // num_attention_heads
    input_shape = get_shape_list(input_tensor, expected_rank=3)
    input_width = input_shape[2]

    all_layer_outputs = []
    if input_width != hidden_size:
        prev_output = dense_layer_2d(input_tensor,
                                     hidden_size,
                                     create_initializer(initializer_range),
                                     None,
                                     use_einsum=use_einsum,
                                     name="embedding_hidden_mapping_in")
    else:
        prev_output = input_tensor
    with tf.variable_scope("transformer", reuse=tf.AUTO_REUSE):
        for layer_idx in range(num_hidden_layers):
            group_idx = int(layer_idx / num_hidden_layers * num_hidden_groups)
            with tf.variable_scope("group_%d" % group_idx):
                with tf.name_scope("layer_%d" % layer_idx):
                    layer_output = prev_output
                    for inner_group_idx in range(inner_group_num):
                        with tf.variable_scope("inner_group_%d" %
                                               inner_group_idx):
                            layer_output = attention_ffn_block(
                                layer_input=layer_output,
                                hidden_size=hidden_size,
                                attention_mask=attention_mask,
                                num_attention_heads=num_attention_heads,
                                attention_head_size=attention_head_size,
                                attention_probs_dropout_prob=
                                attention_probs_dropout_prob,
                                intermediate_size=intermediate_size,
                                intermediate_act_fn=intermediate_act_fn,
                                initializer_range=initializer_range,
                                hidden_dropout_prob=hidden_dropout_prob,
                                use_einsum=use_einsum)
                            prev_output = layer_output
                            all_layer_outputs.append(layer_output)
    if do_return_all_layers:
        return all_layer_outputs
    else:
        return all_layer_outputs[-1]
コード例 #25
0
ファイル: nelder_mead.py プロジェクト: xzxzmmnn/probability
def minimize(objective_function,
             initial_simplex=None,
             initial_vertex=None,
             step_sizes=None,
             objective_at_initial_simplex=None,
             objective_at_initial_vertex=None,
             batch_evaluate_objective=False,
             func_tolerance=1e-8,
             position_tolerance=1e-8,
             parallel_iterations=1,
             max_iterations=None,
             reflection=None,
             expansion=None,
             contraction=None,
             shrinkage=None,
             name=None):
    """Minimum of the objective function using the Nelder Mead simplex algorithm.

  Performs an unconstrained minimization of a (possibly non-smooth) function
  using the Nelder Mead simplex method. Nelder Mead method does not support
  univariate functions. Hence the dimensions of the domain must be 2 or greater.
  For details of the algorithm, see
  [Press, Teukolsky, Vetterling and Flannery(2007)][1].

  Points in the domain of the objective function may be represented as a
  `Tensor` of general shape but with rank at least 1. The algorithm proceeds
  by modifying a full rank simplex in the domain. The initial simplex may
  either be specified by the user or can be constructed using a single vertex
  supplied by the user. In the latter case, if `v0` is the supplied vertex,
  the simplex is the convex hull of the set:

  ```None
  S = {v0} + {v0 + step_i * e_i}
  ```

  Here `e_i` is a vector which is `1` along the `i`-th axis and zero elsewhere
  and `step_i` is a characteristic length scale along the `i`-th axis. If the
  step size is not supplied by the user, a unit step size is used in every axis.
  Alternately, a single step size may be specified which is used for every
  axis. The most flexible option is to supply a bespoke step size for every
  axis.

  ### Usage:

  The following example demonstrates the usage of the Nelder Mead minimzation
  on a two dimensional problem with the minimum located at a non-differentiable
  point.

  ```python
    # The objective function
    def sqrt_quadratic(x):
      return tf.sqrt(tf.reduce_sum(x ** 2, axis=-1))

    start = tf.constant([6.0, -21.0])  # Starting point for the search.
    optim_results = tfp.optimizer.nelder_mead_minimize(
        sqrt_quadratic, initial_vertex=start, func_tolerance=1e-8,
        batch_evaluate_objective=True)

    # Check that the search converged
    assert(optim_results.converged)
    # Check that the argmin is close to the actual value.
    np.testing.assert_allclose(optim_results.position, np.array([0.0, 0.0]),
                                atol=1e-7)
    # Print out the total number of function evaluations it took.
    print("Function evaluations: %d" % optim_results.num_objective_evaluations)
  ```

  ### References:
  [1]: William Press, Saul Teukolsky, William Vetterling and Brian Flannery.
    Numerical Recipes in C++, third edition. pp. 502-507. (2007).
    http://numerical.recipes/cpppages/chap0sel.pdf

  [2]: Jeffrey Lagarias, James Reeds, Margaret Wright and Paul Wright.
    Convergence properties of the Nelder-Mead simplex method in low dimensions,
    Siam J. Optim., Vol 9, No. 1, pp. 112-147. (1998).
    http://www.math.kent.edu/~reichel/courses/Opt/reading.material.2/nelder.mead.pdf

  [3]: Fuchang Gao and Lixing Han. Implementing the Nelder-Mead simplex
    algorithm with adaptive parameters. Computational Optimization and
    Applications, Vol 51, Issue 1, pp 259-277. (2012).
    https://pdfs.semanticscholar.org/15b4/c4aa7437df4d032c6ee6ce98d6030dd627be.pdf

  Args:
    objective_function:  A Python callable that accepts a point as a
      real `Tensor` and returns a `Tensor` of real dtype containing
      the value of the function at that point. The function
      to be minimized. If `batch_evaluate_objective` is `True`, the callable
      may be evaluated on a `Tensor` of shape `[n+1] + s ` where `n` is
      the dimension of the problem and `s` is the shape of a single point
      in the domain (so `n` is the size of a `Tensor` representing a
      single point).
      In this case, the expected return value is a `Tensor` of shape `[n+1]`.
      Note that this method does not support univariate functions so the problem
      dimension `n` must be strictly greater than 1.
    initial_simplex: (Optional) `Tensor` of real dtype. The initial simplex to
      start the search. If supplied, should be a `Tensor` of shape `[n+1] + s`
      where `n` is the dimension of the problem and `s` is the shape of a
      single point in the domain. Each row (i.e. the `Tensor` with a given
      value of the first index) is interpreted as a vertex of a simplex and
      hence the rows must be affinely independent. If not supplied, an axes
      aligned simplex is constructed using the `initial_vertex` and
      `step_sizes`. Only one and at least one of `initial_simplex` and
      `initial_vertex` must be supplied.
    initial_vertex: (Optional) `Tensor` of real dtype and any shape that can
      be consumed by the `objective_function`. A single point in the domain that
      will be used to construct an axes aligned initial simplex.
    step_sizes: (Optional) `Tensor` of real dtype and shape broadcasting
      compatible with `initial_vertex`. Supplies the simplex scale along each
      axes. Only used if `initial_simplex` is not supplied. See description
      above for details on how step sizes and initial vertex are used to
      construct the initial simplex.
    objective_at_initial_simplex: (Optional) Rank `1` `Tensor` of real dtype
      of a rank `1` `Tensor`. The value of the objective function at the
      initial simplex. May be supplied only if `initial_simplex` is
      supplied. If not supplied, it will be computed.
    objective_at_initial_vertex: (Optional) Scalar `Tensor` of real dtype. The
      value of the objective function at the initial vertex. May be supplied
      only if the `initial_vertex` is also supplied.
    batch_evaluate_objective: (Optional) Python `bool`. If True, the objective
      function will be evaluated on all the vertices of the simplex packed
      into a single tensor. If False, the objective will be mapped across each
      vertex separately. Evaluating the objective function in a batch allows
      use of vectorization and should be preferred if the objective function
      allows it.
    func_tolerance: (Optional) Scalar `Tensor` of real dtype. The algorithm
      stops if the absolute difference between the largest and the smallest
      function value on the vertices of the simplex is below this number.
    position_tolerance: (Optional) Scalar `Tensor` of real dtype. The
      algorithm stops if the largest absolute difference between the
      coordinates of the vertices is below this threshold.
    parallel_iterations: (Optional) Positive integer. The number of iterations
      allowed to run in parallel.
    max_iterations: (Optional) Scalar positive `Tensor` of dtype `int32`.
      The maximum number of iterations allowed. If `None` then no limit is
      applied.
    reflection: (Optional) Positive Scalar `Tensor` of same dtype as
      `initial_vertex`. This parameter controls the scaling of the reflected
      vertex. See, [Press et al(2007)][1] for details. If not specified,
      uses the dimension dependent prescription of [Gao and Han(2012)][3].
    expansion: (Optional) Positive Scalar `Tensor` of same dtype as
      `initial_vertex`. Should be greater than `1` and `reflection`. This
      parameter controls the expanded scaling of a reflected vertex.
      See, [Press et al(2007)][1] for details. If not specified, uses the
      dimension dependent prescription of [Gao and Han(2012)][3].
    contraction: (Optional) Positive scalar `Tensor` of same dtype as
      `initial_vertex`. Must be between `0` and `1`. This parameter controls
      the contraction of the reflected vertex when the objective function at
      the reflected point fails to show sufficient decrease.
      See, [Press et al(2007)][1] for more details. If not specified, uses
      the dimension dependent prescription of [Gao and Han(2012][3].
    shrinkage: (Optional) Positive scalar `Tensor` of same dtype as
      `initial_vertex`. Must be between `0` and `1`. This parameter is the scale
      by which the simplex is shrunk around the best point when the other
      steps fail to produce improvements.
      See, [Press et al(2007)][1] for more details. If not specified, uses
      the dimension dependent prescription of [Gao and Han(2012][3].
    name: (Optional) Python str. The name prefixed to the ops created by this
      function. If not supplied, the default name 'minimize' is used.

  Returns:
    optimizer_results: A namedtuple containing the following items:
      converged: Scalar boolean tensor indicating whether the minimum was
        found within tolerance.
      num_objective_evaluations: The total number of objective
        evaluations performed.
      position: A `Tensor` containing the last argument value found
        during the search. If the search converged, then
        this value is the argmin of the objective function.
      objective_value: A tensor containing the value of the objective
        function at the `position`. If the search
        converged, then this is the (local) minimum of
        the objective function.
      final_simplex: The last simplex constructed before stopping.
      final_objective_values: The objective function evaluated at the
        vertices of the final simplex.
      initial_simplex: The starting simplex.
      initial_objective_values: The objective function evaluated at the
        vertices of the initial simplex.
      num_iterations: The number of iterations of the main algorithm body.

  Raises:
    ValueError: If any of the following conditions hold
      1. If none or more than one of `initial_simplex` and `initial_vertex` are
        supplied.
      2. If `initial_simplex` and `step_sizes` are both specified.
  """
    with tf1.name_scope(name, 'minimize', [
            initial_simplex, initial_vertex, step_sizes,
            objective_at_initial_simplex, objective_at_initial_vertex,
            func_tolerance, position_tolerance
    ]):
        (dim, _, simplex, objective_at_simplex,
         num_evaluations) = _prepare_args(objective_function, initial_simplex,
                                          initial_vertex, step_sizes,
                                          objective_at_initial_simplex,
                                          objective_at_initial_vertex,
                                          batch_evaluate_objective)
        domain_dtype = simplex.dtype
        (reflection, expansion, contraction,
         shrinkage) = _resolve_parameters(dim, reflection, expansion,
                                          contraction, shrinkage, domain_dtype)

        closure_kwargs = dict(
            objective_function=objective_function,
            dim=dim,
            func_tolerance=func_tolerance,
            position_tolerance=position_tolerance,
            batch_evaluate_objective=batch_evaluate_objective,
            reflection=reflection,
            expansion=expansion,
            contraction=contraction,
            shrinkage=shrinkage)

        def _loop_body(_, iterations, simplex, objective_at_simplex,
                       num_evaluations):
            (converged, next_simplex, next_objective,
             evaluations) = nelder_mead_one_step(simplex, objective_at_simplex,
                                                 **closure_kwargs)

            return (converged, iterations + 1, next_simplex, next_objective,
                    num_evaluations + evaluations)

        initial_args = (False, 0, simplex, objective_at_simplex,
                        num_evaluations)

        # Loop until either we have converged or if the max iterations are supplied
        # then until we have converged or exhausted the available iteration budget.
        def _is_converged(converged, num_iterations, *ignored_args):  # pylint:disable=unused-argument
            # It is important to ensure that not_converged is a tensor. If
            # converged is not a tensor but a Python bool, then the overloaded
            # op '~' acts as bitwise complement so ~True = -2 and ~False = -1.
            # In that case, the loop will never terminate.
            not_converged = tf.logical_not(converged)
            return (not_converged if max_iterations is None else
                    (not_converged & (num_iterations < max_iterations)))

        (converged, num_iterations, final_simplex, final_objective_values,
         final_evaluations) = tf.while_loop(
             cond=_is_converged,
             body=_loop_body,
             loop_vars=initial_args,
             parallel_iterations=parallel_iterations)
        order = tf.argsort(final_objective_values,
                           direction='ASCENDING',
                           stable=True)
        best_index = order[0]
        # The explicit cast to Tensor below is done to avoid returning a mixture
        # of Python types and Tensors which cause problems with session.run.
        # In the eager mode, converged may remain a Python bool. Trying to evaluate
        # the whole tuple in one evaluate call will raise an exception because
        # of the presence of non-tensors. This is very annoying so we explicitly
        # cast those arguments to Tensors.
        return NelderMeadOptimizerResults(
            converged=tf.convert_to_tensor(value=converged),
            num_objective_evaluations=final_evaluations,
            position=final_simplex[best_index],
            objective_value=final_objective_values[best_index],
            final_simplex=final_simplex,
            final_objective_values=final_objective_values,
            num_iterations=tf.convert_to_tensor(value=num_iterations),
            initial_simplex=simplex,
            initial_objective_values=objective_at_simplex)
コード例 #26
0
ファイル: det_model_fn.py プロジェクト: azayz/automl
def _model_fn(features, labels, mode, params, model, variable_filter_fn=None):
  """Model definition entry.

  Args:
    features: the input image tensor with shape [batch_size, height, width, 3].
      The height and width are fixed and equal.
    labels: the input labels in a dictionary. The labels include class targets
      and box targets which are dense label maps. The labels are generated from
      get_input_fn function in data/dataloader.py
    mode: the mode of TPUEstimator including TRAIN, EVAL, and PREDICT.
    params: the dictionary defines hyperparameters of model. The default
      settings are in default_hparams function in this file.
    model: the model outputs class logits and box regression outputs.
    variable_filter_fn: the filter function that takes trainable_variables and
      returns the variable list after applying the filter rule.

  Returns:
    tpu_spec: the TPUEstimatorSpec to run training, evaluation, or prediction.

  Raises:
    RuntimeError: if both ckpt and backbone_ckpt are set.
  """
  utils.image('input_image', features)
  training_hooks = []
  params['is_training_bn'] = (mode == tf.estimator.ModeKeys.TRAIN)

  if params['use_keras_model']:
    def model_fn(inputs):
      model = efficientdet_keras.EfficientDetNet(
          config=hparams_config.Config(params))
      cls_out_list, box_out_list = model(inputs, params['is_training_bn'])
      cls_outputs, box_outputs = {}, {}
      for i in range(params['min_level'], params['max_level'] + 1):
        cls_outputs[i] = cls_out_list[i - params['min_level']]
        box_outputs[i] = box_out_list[i - params['min_level']]
      return cls_outputs, box_outputs
  else:
    model_fn = functools.partial(model, config=hparams_config.Config(params))

  precision = utils.get_precision(params['strategy'], params['mixed_precision'])
  cls_outputs, box_outputs = utils.build_model_with_precision(
      precision, model_fn, features, params['is_training_bn'])

  levels = cls_outputs.keys()
  for level in levels:
    cls_outputs[level] = tf.cast(cls_outputs[level], tf.float32)
    box_outputs[level] = tf.cast(box_outputs[level], tf.float32)

  # First check if it is in PREDICT mode.
  if mode == tf.estimator.ModeKeys.PREDICT:
    predictions = {
        'image': features,
    }
    for level in levels:
      predictions['cls_outputs_%d' % level] = cls_outputs[level]
      predictions['box_outputs_%d' % level] = box_outputs[level]
    return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

  # Set up training loss and learning rate.
  update_learning_rate_schedule_parameters(params)
  global_step = tf.train.get_or_create_global_step()
  learning_rate = learning_rate_schedule(params, global_step)

  # cls_loss and box_loss are for logging. only total_loss is optimized.
  det_loss, cls_loss, box_loss, box_iou_loss = detection_loss(
      cls_outputs, box_outputs, labels, params)
  reg_l2loss = reg_l2_loss(params['weight_decay'])
  total_loss = det_loss + reg_l2loss

  if mode == tf.estimator.ModeKeys.TRAIN:
    utils.scalar('lrn_rate', learning_rate)
    utils.scalar('trainloss/cls_loss', cls_loss)
    utils.scalar('trainloss/box_loss', box_loss)
    utils.scalar('trainloss/det_loss', det_loss)
    utils.scalar('trainloss/reg_l2_loss', reg_l2loss)
    utils.scalar('trainloss/loss', total_loss)
    if params['iou_loss_type']:
      utils.scalar('trainloss/box_iou_loss', box_iou_loss)
    train_epochs = tf.cast(global_step, tf.float32) / params['steps_per_epoch']
    utils.scalar('train_epochs', train_epochs)

  moving_average_decay = params['moving_average_decay']
  if moving_average_decay:
    ema = tf.train.ExponentialMovingAverage(
        decay=moving_average_decay, num_updates=global_step)
    ema_vars = utils.get_ema_vars()

  if mode == tf.estimator.ModeKeys.TRAIN:
    if params['optimizer'].lower() == 'sgd':
      optimizer = tf.train.MomentumOptimizer(
          learning_rate, momentum=params['momentum'])
    elif params['optimizer'].lower() == 'adam':
      optimizer = tf.train.AdamOptimizer(learning_rate)
    else:
      raise ValueError('optimizers should be adam or sgd')

    if params['strategy'] == 'tpu':
      optimizer = tf.tpu.CrossShardOptimizer(optimizer)

    # Batch norm requires update_ops to be added as a train_op dependency.
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    var_list = tf.trainable_variables()
    if variable_filter_fn:
      var_list = variable_filter_fn(var_list)

    if params.get('clip_gradients_norm', None):
      logging.info('clip gradients norm by %f', params['clip_gradients_norm'])
      grads_and_vars = optimizer.compute_gradients(total_loss, var_list)
      with tf.name_scope('clip'):
        grads = [gv[0] for gv in grads_and_vars]
        tvars = [gv[1] for gv in grads_and_vars]
        # First clip each variable's norm, then clip global norm.
        clip_norm = abs(params['clip_gradients_norm'])
        clipped_grads = [tf.clip_by_norm(g, clip_norm) for g in grads]
        clipped_grads, _ = tf.clip_by_global_norm(clipped_grads, clip_norm)
        utils.scalar('gradient_norm', tf.linalg.global_norm(clipped_grads))
        grads_and_vars = list(zip(clipped_grads, tvars))

      with tf.control_dependencies(update_ops):
        train_op = optimizer.apply_gradients(grads_and_vars, global_step)
    else:
      with tf.control_dependencies(update_ops):
        train_op = optimizer.minimize(
            total_loss, global_step, var_list=var_list)

    if moving_average_decay:
      with tf.control_dependencies([train_op]):
        train_op = ema.apply(ema_vars)

  else:
    train_op = None

  eval_metrics = None
  if mode == tf.estimator.ModeKeys.EVAL:

    def metric_fn(**kwargs):
      """Returns a dictionary that has the evaluation metrics."""
      if params['nms_configs'].get('pyfunc', True):
        detections_bs = []
        for index in range(kwargs['boxes'].shape[0]):
          nms_configs = params['nms_configs']
          detections = tf.numpy_function(
              functools.partial(nms_np.per_class_nms, nms_configs=nms_configs),
              [
                  kwargs['boxes'][index],
                  kwargs['scores'][index],
                  kwargs['classes'][index],
                  tf.slice(kwargs['image_ids'], [index], [1]),
                  tf.slice(kwargs['image_scales'], [index], [1]),
                  params['num_classes'],
                  nms_configs['max_output_size'],
              ], tf.float32)
          detections_bs.append(detections)
        detections_bs = postprocess.transform_detections(
            tf.stack(detections_bs))
      else:
        # These two branches should be equivalent, but currently they are not.
        # TODO(tanmingxing): enable the non_pyfun path after bug fix.
        nms_boxes, nms_scores, nms_classes, _ = postprocess.per_class_nms(
            params, kwargs['boxes'], kwargs['scores'], kwargs['classes'],
            kwargs['image_scales'])
        img_ids = tf.cast(
            tf.expand_dims(kwargs['image_ids'], -1), nms_scores.dtype)
        detections_bs = [
            img_ids * tf.ones_like(nms_scores),
            nms_boxes[:, :, 1],
            nms_boxes[:, :, 0],
            nms_boxes[:, :, 3] - nms_boxes[:, :, 1],
            nms_boxes[:, :, 2] - nms_boxes[:, :, 0],
            nms_scores,
            nms_classes,
        ]
        detections_bs = tf.stack(detections_bs, axis=-1, name='detnections')

      if params.get('testdev_dir', None):
        logging.info('Eval testdev_dir %s', params['testdev_dir'])
        eval_metric = coco_metric.EvaluationMetric(
            testdev_dir=params['testdev_dir'])
        coco_metrics = eval_metric.estimator_metric_fn(detections_bs,
                                                       tf.zeros([1]))
      else:
        logging.info('Eval val with groudtruths %s.', params['val_json_file'])
        eval_metric = coco_metric.EvaluationMetric(
            filename=params['val_json_file'])
        coco_metrics = eval_metric.estimator_metric_fn(
            detections_bs, kwargs['groundtruth_data'], params['label_map'])

      # Add metrics to output.
      cls_loss = tf.metrics.mean(kwargs['cls_loss_repeat'])
      box_loss = tf.metrics.mean(kwargs['box_loss_repeat'])
      output_metrics = {
          'cls_loss': cls_loss,
          'box_loss': box_loss,
      }
      output_metrics.update(coco_metrics)
      return output_metrics

    cls_loss_repeat = tf.reshape(
        tf.tile(tf.expand_dims(cls_loss, 0), [
            params['batch_size'],
        ]), [params['batch_size'], 1])
    box_loss_repeat = tf.reshape(
        tf.tile(tf.expand_dims(box_loss, 0), [
            params['batch_size'],
        ]), [params['batch_size'], 1])

    cls_outputs = postprocess.to_list(cls_outputs)
    box_outputs = postprocess.to_list(box_outputs)
    params['nms_configs']['max_nms_inputs'] = anchors.MAX_DETECTION_POINTS
    boxes, scores, classes = postprocess.pre_nms(params, cls_outputs,
                                                 box_outputs)
    metric_fn_inputs = {
        'cls_loss_repeat': cls_loss_repeat,
        'box_loss_repeat': box_loss_repeat,
        'image_ids': labels['source_ids'],
        'groundtruth_data': labels['groundtruth_data'],
        'image_scales': labels['image_scales'],
        'boxes': boxes,
        'scores': scores,
        'classes': classes,
    }
    eval_metrics = (metric_fn, metric_fn_inputs)

  checkpoint = params.get('ckpt') or params.get('backbone_ckpt')

  if checkpoint and mode == tf.estimator.ModeKeys.TRAIN:
    # Initialize the model from an EfficientDet or backbone checkpoint.
    if params.get('ckpt') and params.get('backbone_ckpt'):
      raise RuntimeError(
          '--backbone_ckpt and --checkpoint are mutually exclusive')

    if params.get('backbone_ckpt'):
      var_scope = params['backbone_name'] + '/'
      if params['ckpt_var_scope'] is None:
        # Use backbone name as default checkpoint scope.
        ckpt_scope = params['backbone_name'] + '/'
      else:
        ckpt_scope = params['ckpt_var_scope'] + '/'
    else:
      # Load every var in the given checkpoint
      var_scope = ckpt_scope = '/'

    def scaffold_fn():
      """Loads pretrained model through scaffold function."""
      logging.info('restore variables from %s', checkpoint)

      var_map = utils.get_ckpt_var_map(
          ckpt_path=checkpoint,
          ckpt_scope=ckpt_scope,
          var_scope=var_scope,
          skip_mismatch=params['skip_mismatch'])

      tf.train.init_from_checkpoint(checkpoint, var_map)
      return tf.train.Scaffold()
  elif mode == tf.estimator.ModeKeys.EVAL and moving_average_decay:

    def scaffold_fn():
      """Load moving average variables for eval."""
      logging.info('Load EMA vars with ema_decay=%f', moving_average_decay)
      restore_vars_dict = ema.variables_to_restore(ema_vars)
      saver = tf.train.Saver(restore_vars_dict)
      return tf.train.Scaffold(saver=saver)
  else:
    scaffold_fn = None

  if params['strategy'] != 'tpu':
    # Profile every 1K steps.
    if params.get('profile', False):
      profile_hook = tf.estimator.ProfilerHook(
          save_steps=1000, output_dir=params['model_dir'], show_memory=True)
      training_hooks.append(profile_hook)

      # Report memory allocation if OOM
      class OomReportingHook(tf.estimator.SessionRunHook):

        def before_run(self, run_context):
          return tf.estimator.SessionRunArgs(
              fetches=[],
              options=tf.RunOptions(report_tensor_allocations_upon_oom=True))

      training_hooks.append(OomReportingHook())

    logging_hook = tf.estimator.LoggingTensorHook(
        {
            'step': global_step,
            'det_loss': det_loss,
            'cls_loss': cls_loss,
            'box_loss': box_loss,
        },
        every_n_iter=params.get('iterations_per_loop', 100),
    )
    training_hooks.append(logging_hook)
  if params['strategy'] == 'tpu':
    return tf.estimator.tpu.TPUEstimatorSpec(
        mode=mode,
        loss=total_loss,
        train_op=train_op,
        eval_metrics=eval_metrics,
        host_call=utils.get_tpu_host_call(global_step, params),
        scaffold_fn=scaffold_fn,
        training_hooks=training_hooks)
  else:
    eval_metric_ops = (
        eval_metrics[0](**eval_metrics[1]) if eval_metrics else None)
    utils.get_tpu_host_call(global_step, params)
    return tf.estimator.EstimatorSpec(
        mode=mode,
        loss=total_loss,
        train_op=train_op,
        eval_metric_ops=eval_metric_ops,
        scaffold=scaffold_fn() if scaffold_fn else None,
        training_hooks=training_hooks)
コード例 #27
0
ファイル: networks.py プロジェクト: SopiMlab/magenta
def generator(z,
              progress,
              num_filters_fn,
              resolution_schedule,
              num_blocks=None,
              kernel_size=3,
              colors=3,
              to_rgb_activation=None,
              simple_arch=False,
              scope='progressive_gan_generator',
              reuse=None):
    """Generator network for the progressive GAN model.

  Args:
    z: A `Tensor` of latent vector. The first dimension must be batch size.
    progress: A scalar float `Tensor` of training progress.
    num_filters_fn: A function that maps `block_id` to # of filters for the
        block.
    resolution_schedule: An object of `ResolutionSchedule`.
    num_blocks: An integer of number of blocks. None means maximum number of
        blocks, i.e. `resolution.schedule.num_resolutions`. Defaults to None.
    kernel_size: An integer of convolution kernel size.
    colors: Number of output color channels. Defaults to 3.
    to_rgb_activation: Activation function applied when output rgb.
    simple_arch: Architecture variants for lower memory usage and faster speed
    scope: A string or variable scope.
    reuse: Whether to reuse `scope`. Defaults to None which means to inherit
        the reuse option of the parent scope.
  Returns:
    A `Tensor` of model output and a dictionary of model end points.
  """
    if num_blocks is None:
        num_blocks = resolution_schedule.num_resolutions

    start_h, start_w = resolution_schedule.start_resolutions
    final_h, final_w = resolution_schedule.final_resolutions

    def _conv2d(scope, x, kernel_size, filters, padding='SAME'):
        return layers.custom_conv2d(
            x=x,
            filters=filters,
            kernel_size=kernel_size,
            padding=padding,
            activation=lambda x: layers.pixel_norm(tf.nn.leaky_relu(x)),
            he_initializer_slope=0.0,
            scope=scope)

    def _to_rgb(x):
        return layers.custom_conv2d(x=x,
                                    filters=colors,
                                    kernel_size=1,
                                    padding='SAME',
                                    activation=to_rgb_activation,
                                    scope='to_rgb')

    he_init = tf_slim.variance_scaling_initializer()

    end_points = {}
    scalers = {}
    offsets = {}

    def hook(name, x):
        end_points[name] = x
        scaler_ph = tf.placeholder_with_default(np.ones(
            1, x.dtype.as_numpy_dtype),
                                                shape=1,
                                                name="{}_scaler".format(name))
        scalers[name] = scaler_ph
        offset_ph = tf.placeholder_with_default(np.zeros(
            x.shape, x.dtype.as_numpy_dtype),
                                                shape=x.shape,
                                                name=name)
        offsets[name] = offset_ph
        return x * scaler_ph + offset_ph

    with tf.variable_scope(scope, reuse=reuse):
        with tf.name_scope('input'):
            x = tf_slim.flatten(z)
            end_points['latent_vector'] = x

        with tf.variable_scope(block_name(1)):
            if simple_arch:
                x_shape = tf.shape(x)
                x = tf.layers.dense(x,
                                    start_h * start_w * num_filters_fn(1),
                                    kernel_initializer=he_init)
                x = tf.nn.relu(x)
                x = tf.reshape(
                    x, [x_shape[0], start_h, start_w,
                        num_filters_fn(1)])
            else:
                x = tf.expand_dims(tf.expand_dims(x, 1), 1)
                x = layers.pixel_norm(x)
                # Pad the 1 x 1 image to 2 * (start_h - 1) x 2 * (start_w - 1)
                # with zeros for the next conv.
                x = tf.pad(
                    x,
                    [[0] * 2, [start_h - 1] * 2, [start_w - 1] * 2, [0] * 2])
                # The output is start_h x start_w x num_filters_fn(1).
                x = _conv2d('conv0', x, (start_h, start_w), num_filters_fn(1),
                            'VALID')
                x = hook('conv0', x)
                x = _conv2d('conv1', x, kernel_size, num_filters_fn(1))
                x = hook('conv1', x)
            lods = [x]

        if resolution_schedule.scale_mode == 'H':
            strides = (resolution_schedule.scale_base, 1)
        else:
            strides = (resolution_schedule.scale_base,
                       resolution_schedule.scale_base)

        for block_id in range(2, num_blocks + 1):
            with tf.variable_scope(block_name(block_id)):
                if simple_arch:
                    x = tf.layers.conv2d_transpose(x,
                                                   num_filters_fn(block_id),
                                                   kernel_size=kernel_size,
                                                   strides=strides,
                                                   padding='SAME',
                                                   kernel_initializer=he_init)
                    x = tf.nn.relu(x)
                else:
                    x = resolution_schedule.upscale(
                        x, resolution_schedule.scale_base)
                    x = _conv2d('conv0', x, kernel_size,
                                num_filters_fn(block_id))
                    x = hook('conv0_{}'.format(block_id), x)
                    x = _conv2d('conv1', x, kernel_size,
                                num_filters_fn(block_id))
                    x = hook('conv1_{}'.format(block_id), x)
                lods.append(x)

        outputs = []
        for block_id in range(1, num_blocks + 1):
            with tf.variable_scope(block_name(block_id)):
                if simple_arch:
                    lod = lods[block_id - 1]
                    lod = tf.layers.conv2d(lod,
                                           colors,
                                           kernel_size=1,
                                           padding='SAME',
                                           name='to_rgb',
                                           kernel_initializer=he_init)
                    lod = to_rgb_activation(lod)
                else:
                    lod = _to_rgb(lods[block_id - 1])
                scale = resolution_schedule.scale_factor(block_id)
                lod = resolution_schedule.upscale(lod, scale)
                end_points['upscaled_rgb_{}'.format(block_id)] = lod

                # alpha_i is used to replace lod_select. Note sum(alpha_i) is
                # garanteed to be 1.
                alpha = _generator_alpha(block_id, progress)
                end_points['alpha_{}'.format(block_id)] = alpha

                outputs.append(lod * alpha)

        predictions = tf.add_n(outputs)
        batch_size = int(z.shape[0])
        predictions.set_shape([batch_size, final_h, final_w, colors])
        end_points['predictions'] = predictions

    return predictions, end_points, {"scalers": scalers, "offsets": offsets}
コード例 #28
0
    def cnn(self):
        with tf.device('/cpu:0'):
            self.embedding = tf.get_variable(
                "embeddings",
                shape=[self.config.vocab_size, self.config.embedding_size],
                initializer=tf.constant_initializer(self.config.pre_trianing))
            self.embedding_inputs = tf.nn.embedding_lookup(
                self.embedding, self.input_x)
            self.embedding_inputs_expanded = tf.expand_dims(
                self.embedding_inputs, -1)

        with tf.name_scope('cnn'):
            pooled_outputs = []
            for i, filter_size in enumerate(self.config.filter_sizes):
                with tf.name_scope("conv-maxpool-%s" % filter_size):
                    filter_shape = [
                        filter_size, self.config.embedding_size, 1,
                        self.config.num_filters
                    ]
                    W = tf.Variable(tf.truncated_normal(filter_shape,
                                                        stddev=0.1),
                                    name="W")
                    b = tf.Variable(tf.constant(
                        0.1, shape=[self.config.num_filters]),
                                    name="b")
                    conv = tf.nn.conv2d(self.embedding_inputs_expanded,
                                        W,
                                        strides=[1, 1, 1, 1],
                                        padding="VALID",
                                        name="conv")
                    h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
                    pooled = tf.nn.max_pool(h,
                                            ksize=[
                                                1, self.config.seq_length -
                                                filter_size + 1, 1, 1
                                            ],
                                            strides=[1, 1, 1, 1],
                                            padding='VALID',
                                            name="pool")
                    pooled_outputs.append(pooled)

            num_filters_total = self.config.num_filters * len(
                self.config.filter_sizes)
            self.h_pool = tf.concat(pooled_outputs, 3)
            self.outputs = tf.reshape(self.h_pool, [-1, num_filters_total])

        with tf.name_scope("dropout"):
            self.final_output = tf.nn.dropout(self.outputs, self.keep_prob)

        with tf.name_scope('output'):
            fc_w = tf.get_variable('fc_w',
                                   shape=[
                                       self.final_output.shape[1].value,
                                       self.config.num_classes
                                   ])
            fc_b = tf.Variable(tf.constant(0.1,
                                           shape=[self.config.num_classes]),
                               name='fc_b')
            self.logits = tf.matmul(self.final_output, fc_w) + fc_b
            self.prob = tf.nn.softmax(self.logits)
            self.y_pred_cls = tf.argmax(self.logits, 1, name='predictions')

        with tf.name_scope('loss'):
            cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
                logits=self.logits, labels=self.input_y)
            self.l2_loss += tf.nn.l2_loss(fc_w)
            self.l2_loss += tf.nn.l2_loss(fc_b)
            self.loss = tf.reduce_mean(
                cross_entropy) + self.config.l2_reg_lambda * self.l2_loss
            self.loss = tf.reduce_mean(cross_entropy)

        with tf.name_scope('optimizer'):
            optimizer = tf.train.AdamOptimizer(self.config.lr)
            gradients, variables = zip(*optimizer.compute_gradients(self.loss))
            gradients, _ = tf.clip_by_global_norm(gradients, self.config.clip)
            self.optim = optimizer.apply_gradients(
                zip(gradients, variables), global_step=self.global_step)

        with tf.name_scope('accuracy'):
            correct_pred = tf.equal(tf.argmax(self.input_y, 1),
                                    self.y_pred_cls)
            self.acc = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
コード例 #29
0
ファイル: tcn.py プロジェクト: csJoax/neural_recommend
    def build(self):
        """
        构建计算图
        :return:
        """
        tf.reset_default_graph()
        self._train_graph = train_graph = tf.Graph()
        with train_graph.as_default():
            # 获取输入占位符
            self._inputs = inputs = self.get_inputs()
            # 获取User的4个嵌入向量
            self._user_embeds = user_embed = self.get_user_embeds(inputs)
            # 得到用户特征
            self._user_combined_feature = user_combine = \
                self.get_user_combined_feature(user_embed, fc_outputs=self.num_outputs)
            user_combine_layer, user_combine_layer_flat = user_combine.get_combine_layer(
            )

            # 获取电影ID的嵌入向量
            self._item_embeds = movie_embed = self.get_item_embeds(inputs)

            # 获取电影名的特征向量
            item_convs = self.get_item_convs(
                inputs=movie_embed["movie_titles"],
                dropout_keep_prob=inputs["dropout_keep_prob"])
            pool_layer_flat, dropout_layer = item_convs["movie_titles"]

            # 得到电影特征
            self._item_combined_feature = movie_combine = \
                self.get_movie_combined_feature(movie_embed, dropout_layer, fc_outputs=self.num_outputs)
            movie_combine_layer, movie_combine_layer_flat = movie_combine.get_combine_layer(
            )

            # 计算出评分,要注意两个不同的方案,inference的名字(name值)是不一样的,后面做推荐时要根据name取得tensor
            with tf.name_scope("inference"):
                # 将用户特征和电影特征作为输入,经过全连接,输出一个值的方案
                #         inference_layer = tf.concat([user_combine_layer_flat, movie_combine_layer_flat], 1)  #(?, 200)
                #         inference = tf.layers.dense(inference_layer, 1,
                #                                     kernel_initializer=tf.truncated_normal_initializer(stddev=0.01),
                #                                     kernel_regularizer=tf.nn.l2_loss, name="inference")
                # 简单的将用户特征和电影特征做矩阵乘法得到一个预测评分
                #        inference = tf.matmul(user_combine_layer_flat, tf.transpose(movie_combine_layer_flat))
                inference = tf.reduce_sum(user_combine_layer_flat *
                                          movie_combine_layer_flat,
                                          axis=1)
                self._inference = tf.expand_dims(inference, axis=1)

            with tf.name_scope("loss"):
                # MSE损失,将计算值回归到评分
                self._cost = tf.losses.mean_squared_error(
                    inputs['targets'], self._inference)
                self._loss = tf.reduce_mean(self._cost)
            # 优化损失
            #     train_op = tf.train.AdamOptimizer(lr).minimize(loss)  #cost
            self._global_step = tf.Variable(0,
                                            name="global_step",
                                            trainable=False)
            self._optimizer = tf.train.AdamOptimizer(inputs['LearningRate'])
            self._gradients = self._optimizer.compute_gradients(
                self._loss)  # cost
            self._train_op = self._optimizer.apply_gradients(
                self._gradients, global_step=self._global_step)
コード例 #30
0
    def loss(self, prediction_dict):
        """
        Returns cost for RCNN based on:

        Args:
            prediction_dict with keys:
                rcnn:
                    cls_score: shape (num_proposals, num_classes + 1)
                        Has the class scoring for each the proposals. Classes
                        are 1-indexed with 0 being the background.

                    cls_prob: shape (num_proposals, num_classes + 1)
                        Application of softmax on cls_score.

                    bbox_offsets: shape (num_proposals, num_classes * 4)
                        Has the offset for each proposal for each class.
                        We have to compare only the proposals labeled with the
                        offsets for that label.

                target:
                    cls_target: shape (num_proposals,)
                        Has the correct label for each of the proposals.
                        0 => background
                        1..n => 1-indexed classes

                    bbox_offsets_target: shape (num_proposals, 4)
                        Has the true offset of each proposal for the true
                        label.
                        In case of not having a true label (non-background)
                        then it's just zeroes.

        Returns:
            loss_dict with keys:
                rcnn_cls_loss: The cross-entropy or log-loss of the
                    classification tasks between then num_classes + background.
                rcnn_reg_loss: The smooth L1 loss for the bounding box
                    regression task to adjust correctly labeled boxes.

        """
        with tf.name_scope('RCNNLoss'):
            cls_score = prediction_dict['rcnn']['cls_score']
            # cls_prob = prediction_dict['rcnn']['cls_prob']
            # Cast target explicitly as int32.
            cls_target = tf.cast(prediction_dict['target']['cls'], tf.int32)

            # First we need to calculate the log loss betweetn cls_prob and
            # cls_target

            # We only care for the targets that are >= 0
            not_ignored = tf.reshape(tf.greater_equal(cls_target, 0), [-1],
                                     name='not_ignored')
            # We apply boolean mask to score, prob and target.
            cls_score_labeled = tf.boolean_mask(cls_score,
                                                not_ignored,
                                                name='cls_score_labeled')
            # cls_prob_labeled = tf.boolean_mask(
            #    cls_prob, not_ignored, name='cls_prob_labeled')
            cls_target_labeled = tf.boolean_mask(cls_target,
                                                 not_ignored,
                                                 name='cls_target_labeled')

            tf.summary.scalar('batch_size',
                              tf.shape(cls_score_labeled)[0], ['rcnn'])

            # Transform to one-hot vector
            cls_target_one_hot = tf.one_hot(cls_target_labeled,
                                            depth=self._num_classes + 1,
                                            name='cls_target_one_hot')

            # We get cross entropy loss of each proposal.
            cross_entropy_per_proposal = (
                tf.nn.softmax_cross_entropy_with_logits_v2(
                    labels=tf.stop_gradient(cls_target_one_hot),
                    logits=cls_score_labeled))

            if self._debug:
                prediction_dict['_debug']['losses'] = {}
                # Save the cross entropy per proposal to be able to
                # visualize proposals with high and low error.
                prediction_dict['_debug']['losses'][
                    'cross_entropy_per_proposal'] = (
                        cross_entropy_per_proposal)

            # Second we need to calculate the smooth l1 loss between
            # `bbox_offsets` and `bbox_offsets_target`.
            bbox_offsets = prediction_dict['rcnn']['bbox_offsets']
            bbox_offsets_target = (prediction_dict['target']['bbox_offsets'])

            # We only want the non-background labels bounding boxes.
            not_ignored = tf.reshape(tf.greater(cls_target, 0), [-1])
            bbox_offsets_labeled = tf.boolean_mask(bbox_offsets,
                                                   not_ignored,
                                                   name='bbox_offsets_labeled')
            bbox_offsets_target_labeled = tf.boolean_mask(
                bbox_offsets_target,
                not_ignored,
                name='bbox_offsets_target_labeled')

            cls_target_labeled = tf.boolean_mask(cls_target,
                                                 not_ignored,
                                                 name='cls_target_labeled')
            # `cls_target_labeled` is based on `cls_target` which has
            # `num_classes` + 1 classes.
            # for making `one_hot` with depth `num_classes` to work we need
            # to lower them to make them 0-index.
            cls_target_labeled = cls_target_labeled - 1

            cls_target_one_hot = tf.one_hot(cls_target_labeled,
                                            depth=self._num_classes,
                                            name='cls_target_one_hot')

            # cls_target now is (num_labeled, num_classes)
            bbox_flatten = tf.reshape(bbox_offsets_labeled, [-1, 4],
                                      name='bbox_flatten')

            # We use the flatten cls_target_one_hot as boolean mask for the
            # bboxes.
            cls_flatten = tf.cast(tf.reshape(cls_target_one_hot, [-1]),
                                  tf.bool, 'cls_flatten_as_bool')

            bbox_offset_cleaned = tf.boolean_mask(bbox_flatten, cls_flatten,
                                                  'bbox_offset_cleaned')

            # Calculate the smooth l1 loss between the "cleaned" bboxes
            # offsets (that means, the useful results) and the labeled
            # targets.
            reg_loss_per_proposal = smooth_l1_loss(bbox_offset_cleaned,
                                                   bbox_offsets_target_labeled,
                                                   sigma=self._l1_sigma)

            tf.summary.scalar('rcnn_foreground_samples',
                              tf.shape(bbox_offset_cleaned)[0], ['rcnn'])

            if self._debug:
                # Also save reg loss per proposals to be able to visualize
                # good and bad proposals in debug mode.
                prediction_dict['_debug']['losses'][
                    'reg_loss_per_proposal'] = (reg_loss_per_proposal)

            return {
                'rcnn_cls_loss': tf.reduce_mean(cross_entropy_per_proposal),
                'rcnn_reg_loss': tf.reduce_mean(reg_loss_per_proposal),
            }