コード例 #1
0
def vgg_dual_16(inputs1,
                inputs2,
                num_classes=1000,
                is_training=True,
                dropout_keep_prob=0.5,
                spatial_squeeze=True,
                scope='vgg_16',
                update_top_only = False,
                fc_conv_padding='VALID',
                reuse = False):
  with tf.compat.v1.variable_scope(scope, 'vgg_16', [inputs1]) as sc:
    end_points_collection = sc.name + '_end_points'
    # Collect outputs for conv2d, fully_connected and max_pool2d.
    with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
                        outputs_collections=end_points_collection):
      nets = []
      for i, inputs in enumerate([inputs1, inputs2]):
        print(i > 0)
#         with slim.arg_scope(vgg_arg_scope(reuse = tf.compat.v1.AUTO_REUSE or (i > 0))):
        with slim.arg_scope(vgg_arg_scope(reuse = reuse or (i > 0))):
          net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
          net = slim.max_pool2d(net, [2, 2], scope='pool1')
          net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
          net = slim.max_pool2d(net, [2, 2], scope='pool2')
          net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
          net = slim.max_pool2d(net, [2, 2], scope='pool3')
          net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
          net = slim.max_pool2d(net, [2, 2], scope='pool4')
          # if update_top_only:
          #   net = tf.stop_gradient(net)
          net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
          nets.append(net)
      with slim.arg_scope(vgg_arg_scope(reuse = reuse)):
        net = tf.concat(nets, 3)
        net = slim.conv2d(net, 512, [1, 1], scope='conv6')
        net = slim.max_pool2d(net, [2, 2], stride = 2, scope='pool6')

        net = slim.conv2d(net, 512, [1, 1], scope='conv7')
        net = slim.max_pool2d(net, [2, 2], stride = 2, scope='pool7')

        #net = slim.max_pool2d(net, [2, 2], scope='pool6')
        # Use conv2d instead of fully_connected layers.
        #net = slim.conv2d(net, 4096, [7, 7], padding=fc_conv_padding, scope='fc6')
        net = slim.conv2d(net, 2048, [7, 7], padding=fc_conv_padding, scope = 'fc6_')
        net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope = 'dropout6')
        # net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
        # net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope = 'dropout7')

        net = slim.conv2d(net, 2048, [1, 1], scope='fc7_')
        net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope = 'dropout7_')
        if num_classes is not None:
          net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
                            normalizer_fn = None, scope = 'fc8')
        # Convert end_points_collection into a end_point dict.
        end_points = slim.utils.convert_collection_to_dict(end_points_collection)
        if spatial_squeeze:
          net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
          end_points[sc.name + '/fc8'] = net
        return net, end_points
コード例 #2
0
def create_test_network_6():
    """Aligned network with dropout for test.

  The graph is similar to create_test_network_1(), except that the right branch
  has dropout normalization.

  Returns:
    g: Tensorflow graph object (Graph proto).
  """
    g = tf.Graph()
    with g.as_default():
        # An input test image with unknown spatial resolution.
        x = tf.placeholder(tf.float32, (None, None, None, 1),
                           name='input_image')
        # Left branch.
        l1 = slim.conv2d(x, 1, [1, 1], stride=4, scope='L1', padding='VALID')
        # Right branch.
        l2_pad = tf.pad(x, [[0, 0], [1, 0], [1, 0], [0, 0]])
        l2 = slim.conv2d(l2_pad,
                         1, [3, 3],
                         stride=2,
                         scope='L2',
                         padding='VALID')
        l3 = slim.conv2d(l2, 1, [1, 1], stride=2, scope='L3', padding='VALID')
        dropout = slim.dropout(l3)
        # Addition.
        tf.nn.relu(l1 + dropout, name='output')
    return g
コード例 #3
0
def _predict_object_embeddings(object_features,
                               output_dims,
                               slim_fc_scope,
                               keep_prob=1.0,
                               is_training=False):
    """Projects object features to `output_dims` dimensions.

  Args:
    object_features: A [batch, max_num_objects, feature_dims] float tensor.
    output_dims: Dimensions of the object embeddings.
    slim_fc_scope: Slim FC scope.
    keep_prob: Keep probability of the dropout layer.
    is_training: If true, build a training graph.

  Returns:
    A [batch, max_num_objects, output_dims] float tensor.
  """
    output = object_features
    with slim.arg_scope(slim_fc_scope), tf.variable_scope('object_projection'):
        output = slim.fully_connected(output, num_outputs=output_dims)
        output = slim.dropout(output, keep_prob, is_training=is_training)
        output = slim.fully_connected(output,
                                      num_outputs=output_dims,
                                      activation_fn=None)
    return output
コード例 #4
0
ファイル: graph_networks.py プロジェクト: yekeren/WSSGG
    def _build(self,
               input_graph,
               hidden_size=50,
               attn_scale=1.0,
               attn_dropout_keep_prob=1.0,
               regularizer=None,
               is_training=False):

        node_values = input_graph.nodes
        edge_values = input_graph.edges

        value_dims = node_values.shape[-1].value
        assert value_dims == edge_values.shape[-1].value

        # Compute edge values, sender feature + edge feature.
        # - edge_values = [total_num_edges, value_dims]
        edge_value_block = blocks.EdgeBlock(edge_model_fn=lambda: snt.Linear(
            output_size=value_dims, regularizers={'w': regularizer}),
                                            use_edges=True,
                                            use_receiver_nodes=True,
                                            use_sender_nodes=True,
                                            use_globals=False,
                                            name='update_edge_values')
        edge_values = edge_value_block(input_graph).edges
        tf.summary.histogram('mpnn/edge_values', edge_values)

        logits_block = blocks.EdgeBlock(
            edge_model_fn=lambda: snt.Linear(output_size=1,
                                             regularizers={'w': regularizer}),
            # edge_model_fn=lambda: snt.nets.MLP(output_sizes=[hidden_size, 1],
            #                                    activation=tf.nn.tanh,
            #                                    regularizers={'w': regularizer}),
            use_edges=True,
            use_receiver_nodes=True,
            use_sender_nodes=True,
            use_globals=False,
            name='update_attention_logits')
        attention_weights_logits = attn_scale * logits_block(input_graph).edges
        tf.summary.histogram('mpnn/logits', attention_weights_logits)

        normalized_attention_weight = modules._received_edges_normalizer(
            input_graph.replace(edges=attention_weights_logits),
            normalizer=self._normalizer)
        normalized_attention_weight = slim.dropout(normalized_attention_weight,
                                                   attn_dropout_keep_prob,
                                                   is_training=is_training)

        # Attending to sender values according to the weights.
        # - attended_edges = [total_num_edges, value_dims]
        attended_edges = edge_values * normalized_attention_weight

        # Summing all of the attended values from each node.
        # aggregated_attended_values = [total_num_nodes, embedding_size]
        received_edges_aggregator = blocks.ReceivedEdgesToNodesAggregator(
            reducer=tf.math.unsorted_segment_sum)
        aggregated_attended_values = received_edges_aggregator(
            input_graph.replace(edges=attended_edges))

        return input_graph.replace(nodes=aggregated_attended_values,
                                   edges=edge_values)
コード例 #5
0
    def adapt_detection_features(self, detection_features):
        """Projects detection features to embedding space.

    Args:
      detection_features: Detection features.

    Returns:
      embeddings: Projected detection features.
    """
        is_training = self._is_training
        options = self._model_proto

        with tf.variable_scope('detection'):
            detection_features = slim.fully_connected(
                detection_features,
                options.detection_mlp_hidden_units,
                activation_fn=tf.nn.relu,
                scope='hidden')
            detection_features = slim.dropout(
                detection_features,
                keep_prob=options.dropout_keep_prob,
                is_training=is_training)
            detection_features = slim.fully_connected(
                detection_features,
                self._bert_config.hidden_size,
                activation_fn=None,
                scope='output')
        return detection_features
コード例 #6
0
def model(
    inputs,
    is_training = True,
    dropout_keep_prob = 0.8,
    reuse = None,
    scope = 'InceptionV4',
    bottleneck_dim = 512,
):
    # inputs = tf.image.grayscale_to_rgb(inputs)
    with tf.variable_scope(
        scope, 'InceptionV4', [inputs], reuse = reuse
    ) as scope:
        with slim.arg_scope(
            [slim.batch_norm, slim.dropout], is_training = is_training
        ):
            net, end_points = inception_v4_base(inputs, scope = scope)
            print(net.shape)

            with slim.arg_scope(
                [slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                stride = 1,
                padding = 'SAME',
            ):
                with tf.variable_scope('Logits'):
                    # 8 x 8 x 1536
                    kernel_size = net.get_shape()[1:3]
                    print(kernel_size)
                    if kernel_size.is_fully_defined():
                        net = slim.avg_pool2d(
                            net,
                            kernel_size,
                            padding = 'VALID',
                            scope = 'AvgPool_1a',
                        )
                    else:
                        net = tf.reduce_mean(
                            input_tensor = net,
                            axis = [1, 2],
                            keepdims = True,
                            name = 'global_pool',
                        )
                    end_points['global_pool'] = net
                    # 1 x 1 x 1536
                    net = slim.dropout(
                        net, dropout_keep_prob, scope = 'Dropout_1b'
                    )
                    net = slim.flatten(net, scope = 'PreLogitsFlatten')
                    end_points['PreLogitsFlatten'] = net

                    bottleneck = slim.fully_connected(
                        net, bottleneck_dim, scope = 'bottleneck'
                    )
                    logits = slim.fully_connected(
                        bottleneck,
                        2,
                        activation_fn = None,
                        scope = 'Logits_vad',
                    )
                    return logits
コード例 #7
0
ファイル: i3d.py プロジェクト: tpsgrp/python-app
def i3d(inputs,
        num_classes=1000,
        dropout_keep_prob=0.8,
        is_training=True,
        prediction_fn=slim.softmax,
        spatial_squeeze=True,
        reuse=None,
        scope='InceptionV1'):
    """Defines the I3D architecture.

  The default image size used to train this network is 224x224.

  Args:
    inputs: A 5-D float tensor of size [batch_size, num_frames, height, width,
      channels].
    num_classes: number of predicted classes.
    dropout_keep_prob: the percentage of activation values that are retained.
    is_training: whether is training or not.
    prediction_fn: a function to get predictions out of logits.
    spatial_squeeze: if True, logits is of shape is [B, C], if false logits is
        of shape [B, 1, 1, C], where B is batch_size and C is number of classes.
    reuse: whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    scope: Optional variable_scope.

  Returns:
    logits: the pre-softmax activations, a tensor of size
      [batch_size, num_classes]
    end_points: a dictionary from components of the network to the corresponding
      activation.
  """
    # Final pooling and prediction
    with tf.variable_scope(scope,
                           'InceptionV1', [inputs, num_classes],
                           reuse=reuse) as scope:
        with slim.arg_scope([slim.batch_norm, slim.dropout],
                            is_training=is_training):
            net, end_points = i3d_base(inputs, scope=scope)
            with tf.variable_scope('Logits'):
                kernel_size = i3d_utils.reduced_kernel_size_3d(net, [2, 7, 7])
                net = slim.avg_pool3d(net,
                                      kernel_size,
                                      stride=1,
                                      scope='AvgPool_0a_7x7')
                net = slim.dropout(net, dropout_keep_prob, scope='Dropout_0b')
                logits = slim.conv3d(net,
                                     num_classes, [1, 1, 1],
                                     activation_fn=None,
                                     normalizer_fn=None,
                                     scope='Conv2d_0c_1x1')
                # Temporal average pooling.
                logits = tf.reduce_mean(input_tensor=logits, axis=1)
                if spatial_squeeze:
                    logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze')

                end_points['Logits'] = logits
                end_points['Predictions'] = prediction_fn(logits,
                                                          scope='Predictions')
    return logits, end_points
コード例 #8
0
ファイル: squeezenet.py プロジェクト: HitkoDev/awe
def inference(images,
              keep_probability,
              phase_train=True,
              bottleneck_layer_size=128,
              weight_decay=0.0,
              reuse=None):
    batch_norm_params = {
        # Decay for the moving averages.
        'decay': 0.995,
        # epsilon to prevent 0s in variance.
        'epsilon': 0.001,
        # force in-place updates of mean and variance estimates
        'updates_collections': None,
        # Moving averages ends up in the trainable variables collection
        'variables_collections': [tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES],
    }
    with slim.arg_scope(
        [slim.conv2d, slim.fully_connected],
            weights_initializer=tf.compat.v1.keras.initializers.
            VarianceScaling(
                scale=1.0,
                mode="fan_avg",
                distribution=("uniform" if True else "truncated_normal")),
            weights_regularizer=tf.keras.regularizers.l2(0.5 * (weight_decay)),
            normalizer_fn=slim.batch_norm,
            normalizer_params=batch_norm_params):
        with tf.compat.v1.variable_scope('squeezenet', [images], reuse=reuse):
            with slim.arg_scope([slim.batch_norm, slim.dropout],
                                is_training=phase_train):
                net = slim.conv2d(images, 96, [7, 7], stride=2, scope='conv1')
                net = slim.max_pool2d(net, [3, 3], stride=2, scope='maxpool1')
                net = fire_module(net, 16, 64, scope='fire2')
                net = fire_module(net, 16, 64, scope='fire3')
                net = fire_module(net, 32, 128, scope='fire4')
                net = slim.max_pool2d(net, [2, 2], stride=2, scope='maxpool4')
                net = fire_module(net, 32, 128, scope='fire5')
                net = fire_module(net, 48, 192, scope='fire6')
                net = fire_module(net, 48, 192, scope='fire7')
                net = fire_module(net, 64, 256, scope='fire8')
                net = slim.max_pool2d(net, [3, 3], stride=2, scope='maxpool8')
                net = fire_module(net, 64, 256, scope='fire9')
                net = slim.dropout(net, keep_probability)
                net = slim.conv2d(net,
                                  1000, [1, 1],
                                  activation_fn=None,
                                  normalizer_fn=None,
                                  scope='conv10')
                net = slim.avg_pool2d(net,
                                      net.get_shape()[1:3],
                                      scope='avgpool10')
                net = tf.squeeze(net, [1, 2], name='logits')
                net = slim.fully_connected(net,
                                           bottleneck_layer_size,
                                           activation_fn=None,
                                           scope='Bottleneck',
                                           reuse=False)
    return net, None
コード例 #9
0
def model(
    inputs,
    is_training=True,
    dropout_keep_prob=0.8,
    reuse=None,
    scope='InceptionV4',
    create_aux_logits=True,
    num_classes=2,
):
    with tf.variable_scope(scope, 'InceptionV4', [inputs],
                           reuse=reuse) as scope:
        with slim.arg_scope([slim.batch_norm, slim.dropout],
                            is_training=is_training):
            net, end_points = inception_v4_base(inputs, scope=scope)
            print(net.shape)

            with slim.arg_scope(
                [slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                    stride=1,
                    padding='SAME',
            ):

                # Final pooling and prediction
                # TODO(sguada,arnoegw): Consider adding a parameter global_pool which
                # can be set to False to disable pooling here (as in resnet_*()).
                with tf.variable_scope('Logits'):
                    # 8 x 8 x 1536
                    kernel_size = net.get_shape()[1:3]
                    print(kernel_size)
                    if kernel_size.is_fully_defined():
                        net = slim.avg_pool2d(
                            net,
                            kernel_size,
                            padding='VALID',
                            scope='AvgPool_1a',
                        )
                    else:
                        net = tf.reduce_mean(
                            input_tensor=net,
                            axis=[1, 2],
                            keepdims=True,
                            name='global_pool',
                        )
                    end_points['global_pool'] = net
                    # 1 x 1 x 1536
                    net = slim.dropout(net,
                                       dropout_keep_prob,
                                       scope='Dropout_1b')
                    net = slim.flatten(net, scope='PreLogitsFlatten')
                    end_points['PreLogitsFlatten'] = net
                    # 1536
                    logits = slim.fully_connected(net,
                                                  num_classes,
                                                  activation_fn=None,
                                                  scope='Logits')
                    return logits
コード例 #10
0
ファイル: mask_head.py プロジェクト: Asharib90/OCR
    def predict(self, features, num_predictions_per_location):
        """Predicts boxes.

    Args:
      features: A float tensor of shape [batch_size, height, width, channels]
        containing image features.
      num_predictions_per_location: Number of box predictions to be made per
        spatial location.

    Returns:
      mask_predictions: A float tensors of shape
        [batch_size, num_anchors, num_masks, mask_height, mask_width]
        representing the mask predictions for the proposals.
    """
        image_feature = features
        # Add a slot for the background class.
        if self._masks_are_class_agnostic:
            num_masks = 1
        else:
            num_masks = self._num_classes
        num_mask_channels = num_masks * self._mask_height * self._mask_width
        net = image_feature
        if self._use_dropout:
            net = slim.dropout(net, keep_prob=self._dropout_keep_prob)
        if self._use_depthwise:
            mask_predictions = slim.separable_conv2d(
                net,
                None, [self._kernel_size, self._kernel_size],
                padding='SAME',
                depth_multiplier=1,
                stride=1,
                rate=1,
                scope='MaskPredictor_depthwise')
            mask_predictions = slim.conv2d(mask_predictions,
                                           num_predictions_per_location *
                                           num_mask_channels, [1, 1],
                                           activation_fn=None,
                                           normalizer_fn=None,
                                           normalizer_params=None,
                                           scope='MaskPredictor')
        else:
            mask_predictions = slim.conv2d(
                net,
                num_predictions_per_location * num_mask_channels,
                [self._kernel_size, self._kernel_size],
                activation_fn=None,
                normalizer_fn=None,
                normalizer_params=None,
                scope='MaskPredictor')
        batch_size = features.get_shape().as_list()[0]
        if batch_size is None:
            batch_size = tf.shape(features)[0]
        mask_predictions = tf.reshape(
            mask_predictions,
            [batch_size, -1, num_masks, self._mask_height, self._mask_width])
        return mask_predictions
コード例 #11
0
ファイル: class_head.py プロジェクト: anhgithub16/my-project
    def predict(self, features, num_predictions_per_location):
        """Predicts boxes.

    Args:
      features: A float tensor of shape [batch_size, height, width, channels]
        containing image features.
      num_predictions_per_location: Number of box predictions to be made per
        spatial location.

    Returns:
      class_predictions_with_background: A float tensors of shape
        [batch_size, num_anchors, num_class_slots] representing the class
        predictions for the proposals.
    """
        net = features
        if self._use_dropout:
            net = slim.dropout(net, keep_prob=self._dropout_keep_prob)
        if self._use_depthwise:
            depthwise_scope = self._scope + '_depthwise'
            class_predictions_with_background = slim.separable_conv2d(
                net,
                None, [self._kernel_size, self._kernel_size],
                padding='SAME',
                depth_multiplier=1,
                stride=1,
                rate=1,
                scope=depthwise_scope)
            class_predictions_with_background = slim.conv2d(
                class_predictions_with_background,
                num_predictions_per_location * self._num_class_slots, [1, 1],
                activation_fn=None,
                normalizer_fn=None,
                normalizer_params=None,
                scope=self._scope)
        else:
            class_predictions_with_background = slim.conv2d(
                net,
                num_predictions_per_location * self._num_class_slots,
                [self._kernel_size, self._kernel_size],
                activation_fn=None,
                normalizer_fn=None,
                normalizer_params=None,
                scope=self._scope,
                biases_initializer=tf.constant_initializer(
                    self._class_prediction_bias_init))
        if self._apply_sigmoid_to_scores:
            class_predictions_with_background = tf.sigmoid(
                class_predictions_with_background)
        batch_size = features.get_shape().as_list()[0]
        if batch_size is None:
            batch_size = tf.shape(features)[0]
        class_predictions_with_background = tf.reshape(
            class_predictions_with_background,
            [batch_size, -1, self._num_class_slots])
        return class_predictions_with_background
コード例 #12
0
  def predict(self, features, num_predictions_per_location):
    """Predicts boxes.

    Args:
      features: A float tensor of shape [batch_size, height, width, channels]
        containing image features.
      num_predictions_per_location: Number of box predictions to be made per
        spatial location.

    Returns:
      class_predictions_with_background: A tensor of shape
        [batch_size, num_anchors, num_class_slots] representing the class
        predictions for the proposals, or a tensor of shape [batch, height,
        width, num_predictions_per_location * num_class_slots] representing
        class predictions before reshaping if self._return_flat_predictions is
        False.
    """
    class_predictions_net = features
    if self._use_dropout:
      class_predictions_net = slim.dropout(
          class_predictions_net, keep_prob=self._dropout_keep_prob)
    if self._use_depthwise:
      conv_op = functools.partial(slim.separable_conv2d, depth_multiplier=1)
    else:
      conv_op = slim.conv2d
    class_predictions_with_background = conv_op(
        class_predictions_net,
        num_predictions_per_location * self._num_class_slots,
        [self._kernel_size, self._kernel_size],
        activation_fn=None, stride=1, padding='SAME',
        normalizer_fn=None,
        biases_initializer=tf.constant_initializer(
            self._class_prediction_bias_init),
        scope=self._scope)
    batch_size, height, width = shape_utils.combined_static_and_dynamic_shape(
        features)[0:3]
    class_predictions_with_background = tf.reshape(
        class_predictions_with_background, [
            batch_size, height, width, num_predictions_per_location,
            self._num_class_slots
        ])
    class_predictions_with_background = self._score_converter_fn(
        class_predictions_with_background)
    if self._return_flat_predictions:
      class_predictions_with_background = tf.reshape(
          class_predictions_with_background,
          [batch_size, -1, self._num_class_slots])
    else:
      class_predictions_with_background = tf.reshape(
          class_predictions_with_background, [
              batch_size, height, width,
              num_predictions_per_location * self._num_class_slots
          ])
    return class_predictions_with_background
コード例 #13
0
def conv_block(inputs, n_filters, filter_size=[3, 3], dropout_p=0.0):
	"""
	Basic conv block for Encoder-Decoder
	Apply successivly Convolution, BatchNormalization, ReLU nonlinearity
	Dropout (if dropout_p > 0) on the inputs
	"""
	conv = slim.conv2d(inputs, n_filters, filter_size, activation_fn=None, normalizer_fn=None)
	out = tf.nn.relu(slim.batch_norm(conv, fused=True))
	if dropout_p != 0.0:
	  out = slim.dropout(out, keep_prob=(1.0-dropout_p))
	return out
コード例 #14
0
def conv_transpose_block(inputs, n_filters, strides=2, filter_size=[3, 3], dropout_p=0.0):
	"""
	Basic conv transpose block for Encoder-Decoder upsampling
	Apply successivly Transposed Convolution, BatchNormalization, ReLU nonlinearity
	Dropout (if dropout_p > 0) on the inputs
	"""
	conv = slim.conv2d_transpose(inputs, n_filters, kernel_size=[3, 3], stride=[strides, strides])
	out = tf.nn.relu(slim.batch_norm(conv, fused=True))
	if dropout_p != 0.0:
	  out = slim.dropout(out, keep_prob=(1.0-dropout_p))
	return out
コード例 #15
0
    def build_predictions(self, net, rois, is_training, initializer,
                          initializer_bbox):

        # Crop image ROIs
        pool5 = self._crop_pool_layer(net, rois, "pool5")
        pool5_flat = slim.flatten(pool5, scope='flatten')

        # Fully connected layers
        fc6 = slim.fully_connected(pool5_flat, 4096, scope='fc6')
        if is_training:
            fc6 = slim.dropout(fc6,
                               keep_prob=0.5,
                               is_training=True,
                               scope='dropout6')

        fc7 = slim.fully_connected(fc6, 4096, scope='fc7')
        if is_training:
            fc7 = slim.dropout(fc7,
                               keep_prob=0.5,
                               is_training=True,
                               scope='dropout7')

        # Scores and predictions
        cls_score = slim.fully_connected(fc7,
                                         self._num_classes,
                                         weights_initializer=initializer,
                                         trainable=is_training,
                                         activation_fn=None,
                                         scope='cls_score')
        cls_prob = self._softmax_layer(cls_score, "cls_prob")
        bbox_prediction = slim.fully_connected(
            fc7,
            self._num_classes * 4,
            weights_initializer=initializer_bbox,
            trainable=is_training,
            activation_fn=None,
            scope='bbox_pred')

        return cls_score, cls_prob, bbox_prediction
コード例 #16
0
def regressor(x,
              num_output=85,
              is_training=True,
              reuse=False,
              name="3D_module"):

    with tf.variable_scope(name, reuse=reuse) as scope:

        net = slim.fully_connected(x, 1024, scope='fc1')
        net = slim.dropout(net, 0.5, is_training=is_training, scope='dropout1')
        net = slim.fully_connected(net, 1024, scope='fc2')
        net = slim.dropout(net, 0.5, is_training=is_training, scope='dropout2')
        small_xavier = tf.keras.initializers.VarianceScaling(
            scale=.01, mode='fan_avg', distribution='uniform')

        net = slim.fully_connected(net,
                                   num_output,
                                   activation_fn=None,
                                   weights_initializer=small_xavier,
                                   scope='fc3')

    return net
コード例 #17
0
def Encoder_fc3_dropout(x,
                        num_output=85,
                        is_training=True,
                        reuse=False,
                        name="3D_module"):
    """
    3D inference module. 3 MLP layers (last is the output)
    With dropout  on first 2.
    Input:
    - x: N x [|img_feat|, |3D_param|]
    - reuse: bool

    Outputs:
    - 3D params: N x num_output
      if orthogonal: 
           either 85: (3 + 24*3 + 10) or 109 (3 + 24*4 + 10) for factored axis-angle representation
      if perspective:
          86: (f, tx, ty, tz) + 24*3 + 10, or 110 for factored axis-angle.
    - variables: tf variables
    """
    if reuse:
        print('Reuse is on!')
    with tf.variable_scope(name, reuse=reuse) as scope:
        net = slim.fully_connected(x, 1024, scope='fc1')
        net = slim.dropout(net, 0.5, is_training=is_training, scope='dropout1')
        net = slim.fully_connected(net, 1024, scope='fc2')
        net = slim.dropout(net, 0.5, is_training=is_training, scope='dropout2')
        small_xavier = variance_scaling_initializer(factor=.01,
                                                    mode='FAN_AVG',
                                                    uniform=True)
        net = slim.fully_connected(net,
                                   num_output,
                                   activation_fn=None,
                                   weights_initializer=small_xavier,
                                   scope='fc3')

    variables = tf.global_variables(scope.name)
    return net, variables
コード例 #18
0
def conv_net(inputs, hparams):
  """Builds the ConvNet from Kelz 2016."""
  with slim.arg_scope(
      [slim.conv2d, slim.fully_connected],
      activation_fn=tf.nn.relu,
      weights_initializer=slim.variance_scaling_initializer(
          factor=2.0, mode='FAN_AVG', uniform=True)):

    net = inputs
    i = 0
    for (conv_temporal_size, conv_freq_size,
         num_filters, freq_pool_size, dropout_amt) in zip(
             hparams.temporal_sizes, hparams.freq_sizes, hparams.num_filters,
             hparams.pool_sizes, hparams.dropout_keep_amts):
      net = slim.conv2d(
          net,
          num_filters, [conv_temporal_size, conv_freq_size],
          scope='conv' + str(i),
          normalizer_fn=slim.batch_norm)
      if freq_pool_size > 1:
        net = slim.max_pool2d(
            net, [1, freq_pool_size],
            stride=[1, freq_pool_size],
            scope='pool' + str(i))
      if dropout_amt < 1:
        net = slim.dropout(net, dropout_amt, scope='dropout' + str(i))
      i += 1

    # Flatten while preserving batch and time dimensions.
    dims = tf.shape(net)
    net = tf.reshape(
        net, (dims[0], dims[1], net.shape[2] * net.shape[3]),
        'flatten_end')

    net = slim.fully_connected(net, hparams.fc_size, scope='fc_end')
    net = slim.dropout(net, hparams.fc_dropout_keep_amt, scope='dropout_end')

    return net
コード例 #19
0
def vgg_gel2(gel0_pre, gel0_post,
             gel1_pre, gel1_post,
             num_classes = 2,
             is_training = True,
             update_top_only = False,
             fc_conv_padding='VALID',
             dropout_keep_prob = 0.5,
             diff = True,
             reuse = False,
             scope = 'vgg_16'):
  print('reuse =', reuse)
  if diff:
    nets = []
    r = reuse
    if gel0_pre is not None:
      nets.append(vgg_dual_16(gel0_post - gel0_pre, gel0_post, reuse = r, is_training = is_training, 
                              num_classes = None, update_top_only = update_top_only, scope = scope)[0])
      r = True
    if gel1_pre is not None:    
      nets.append(vgg_dual_16(gel1_post - gel1_pre, gel1_post, reuse = r, is_training = is_training, 
                              num_classes = None, update_top_only = update_top_only, scope = scope)[0])
      r = True
    return tf.concat(nets, 1)
  else:
    net0 = pair_vgg(gel0_post, gel0_pre, is_training = is_training, update_top_only = update_top_only, scope = scope)
    net1 = pair_vgg(gel1_post, gel1_pre, reuse = True, is_training = is_training, update_top_only = update_top_only, scope = scope)

    with tf.variable_scope(scope, scope), \
             slim.arg_scope(vgg_arg_scope(reuse)):
      net = tf.concat([net0, net1], 3)
      net = slim.conv2d(net, 2048, [7, 7], padding = fc_conv_padding, scope = 'fc6_')
      net = slim.dropout(net, dropout_keep_prob, is_training = is_training, scope = 'dropout6')
      net = slim.conv2d(net, 2048, [1, 1], scope =  'fc7_')
      net = slim.dropout(net, dropout_keep_prob, is_training = is_training, scope = 'dropout7')
      if num_classes is not None:
        net = slim.conv2d(net, num_classes, [1, 1], activation_fn = None, normalizer_fn = None, scope = 'fc8_')
      net = net[:, 0, 0, :]
  return net
コード例 #20
0
    def build(self, inputs):
        """Returns an InceptionV3FCN model with configurable conv2d normalization.

    Args:
      inputs: a map from input string names to tensors. Required:
        * IMAGES: a tensor of shape [batch, height, width, channels]

    Returns:
      A dictionary from network layer names to the corresponding layer
      activation Tensors. Includes:
        * PRE_LOGITS: activation layer preceding LOGITS
        * LOGITS: the pre-softmax activations, size [batch, num_classes]
        * PROBABILITIES: softmax probs, size [batch, num_classes]
    """
        images = self._get_tensor(inputs, self.IMAGES, expected_rank=4)
        with slim.arg_scope(
                scope_utils.get_conv_scope(self._conv_scope_params,
                                           self._is_training)):
            net, end_points = self._network_base(
                images,
                min_depth=self._min_depth,
                depth_multiplier=self._depth_multiplier)
            # Final pooling and prediction
            with tf.variable_scope('Logits'):
                # 1 x 1 x 768
                net = slim.dropout(net,
                                   keep_prob=self._prelogit_dropout_keep_prob,
                                   is_training=self._is_training,
                                   scope='Dropout_1b')
                end_points[self.PRE_LOGITS] = net
                # 1 x 1 x num_classes
                logits = slim.conv2d(net,
                                     self._num_classes, [1, 1],
                                     activation_fn=None,
                                     normalizer_fn=None,
                                     stride=self._logits_stride,
                                     scope='Conv2d_1c_1x1')
            probabilities_tensor = tf.nn.softmax(logits)
            end_points[self.PROBABILITIES_TENSOR] = probabilities_tensor
            if self._logits_stride == 1:
                # Reshape to remove height and width
                end_points[self.LOGITS] = tf.squeeze(logits, [1, 2],
                                                     name='SpatialSqueeze')
                end_points[self.PROBABILITIES] = tf.squeeze(
                    probabilities_tensor, [1, 2], name='SpatialSqueeze')
            else:
                end_points[self.LOGITS] = logits
                end_points[self.PROBABILITIES] = probabilities_tensor
        return end_points
コード例 #21
0
def preact_conv(inputs, n_filters, kernel_size=[3, 3], dropout_p=0.2):
    """
    Basic pre-activation layer for DenseNets
    Apply successivly BatchNormalization, ReLU nonlinearity, Convolution and
    Dropout (if dropout_p > 0) on the inputs
    """
    preact = tf.nn.relu(slim.batch_norm(inputs, fused=True))
    conv = slim.conv2d(preact,
                       n_filters,
                       kernel_size,
                       activation_fn=None,
                       normalizer_fn=None)
    if dropout_p != 0.0:
        conv = slim.dropout(conv, keep_prob=(1.0 - dropout_p))
    return conv
コード例 #22
0
    def predict(self, features, num_predictions_per_location=1):
        """Predicts boxes.

    Args:
      features: A float tensor of shape [batch_size, height, width,
        channels] containing features for a batch of images.
      num_predictions_per_location: Int containing number of predictions per
        location.

    Returns:
      box_encodings: A float tensor of shape
        [batch_size, 1, num_classes, code_size] representing the location of the
        objects.

    Raises:
      ValueError: If num_predictions_per_location is not 1.
    """
        if num_predictions_per_location != 1:
            raise ValueError(
                'Only num_predictions_per_location=1 is supported')
        spatial_averaged_roi_pooled_features = tf.reduce_mean(features, [1, 2],
                                                              keep_dims=True,
                                                              name='AvgPool')
        flattened_roi_pooled_features = slim.flatten(
            spatial_averaged_roi_pooled_features)
        if self._use_dropout:
            flattened_roi_pooled_features = slim.dropout(
                flattened_roi_pooled_features,
                keep_prob=self._dropout_keep_prob,
                is_training=self._is_training)
        number_of_boxes = 1
        if not self._share_box_across_classes:
            number_of_boxes = self._num_classes

        with slim.arg_scope(self._fc_hyperparams_fn()):
            box_encodings = slim.fully_connected(flattened_roi_pooled_features,
                                                 number_of_boxes *
                                                 self._box_code_size,
                                                 reuse=tf.AUTO_REUSE,
                                                 activation_fn=None,
                                                 scope='BoxEncodingPredictor')
        box_encodings = tf.reshape(
            box_encodings, [-1, 1, number_of_boxes, self._box_code_size])
        return box_encodings
コード例 #23
0
ファイル: class_head.py プロジェクト: anhgithub16/my-project
    def predict(self, features, num_predictions_per_location=1):
        """Predicts boxes and class scores.

    Args:
      features: A float tensor of shape [batch_size, height, width, channels]
        containing features for a batch of images.
      num_predictions_per_location: Int containing number of predictions per
        location.

    Returns:
      class_predictions_with_background: A float tensor of shape
        [batch_size, 1, num_class_slots] representing the class predictions for
        the proposals.

    Raises:
      ValueError: If num_predictions_per_location is not 1.
    """
        if num_predictions_per_location != 1:
            raise ValueError(
                'Only num_predictions_per_location=1 is supported')
        spatial_averaged_roi_pooled_features = tf.reduce_mean(features, [1, 2],
                                                              keep_dims=True,
                                                              name='AvgPool')
        flattened_roi_pooled_features = slim.flatten(
            spatial_averaged_roi_pooled_features)
        if self._use_dropout:
            flattened_roi_pooled_features = slim.dropout(
                flattened_roi_pooled_features,
                keep_prob=self._dropout_keep_prob,
                is_training=self._is_training)

        with slim.arg_scope(self._fc_hyperparams_fn()):
            class_predictions_with_background = slim.fully_connected(
                flattened_roi_pooled_features,
                self._num_class_slots,
                reuse=tf.AUTO_REUSE,
                activation_fn=None,
                scope=self._scope)
        class_predictions_with_background = tf.reshape(
            class_predictions_with_background, [-1, 1, self._num_class_slots])
        return class_predictions_with_background
コード例 #24
0
    def project_detection_features(self, detection_features):
        """Projects detection features to embedding space.

    Args:
      detection_features: Detection features.

    Returns:
      embeddings: Projected detection features.
    """
        is_training = self._is_training
        options = self._model_proto

        if options.detection_adaptation == model_pb2.MLP:

            detection_features = slim.fully_connected(
                detection_features,
                options.detection_mlp_hidden_units,
                activation_fn=tf.nn.relu,
                scope='detection/project')
            detection_features = slim.dropout(
                detection_features,
                keep_prob=options.dropout_keep_prob,
                is_training=is_training)
            detection_features = slim.fully_connected(
                detection_features,
                self._bert_config.hidden_size,
                activation_fn=None,
                scope='detection/adaptation')
            return detection_features

        elif options.detection_adaptation == model_pb2.LINEAR:

            detection_features = slim.fully_connected(
                detection_features,
                self._bert_config.hidden_size,
                activation_fn=None,
                scope='detection/adaptation')
            return detection_features

        raise ValueError('Invalid detection adaptation method.')
コード例 #25
0
ファイル: mask_head.py プロジェクト: Asharib90/OCR
    def predict(self, features, num_predictions_per_location):
        """Predicts boxes.

    Args:
      features: A float tensor of shape [batch_size, height, width, channels]
        containing image features.
      num_predictions_per_location: Number of box predictions to be made per
        spatial location.

    Returns:
      mask_predictions: A tensor of shape
        [batch_size, num_anchors, num_classes, mask_height, mask_width]
        representing the mask predictions for the proposals.
    """
        mask_predictions_net = features
        if self._masks_are_class_agnostic:
            num_masks = 1
        else:
            num_masks = self._num_classes
        num_mask_channels = num_masks * self._mask_height * self._mask_width
        if self._use_dropout:
            mask_predictions_net = slim.dropout(
                mask_predictions_net, keep_prob=self._dropout_keep_prob)
        mask_predictions = slim.conv2d(mask_predictions_net,
                                       num_predictions_per_location *
                                       num_mask_channels,
                                       [self._kernel_size, self._kernel_size],
                                       activation_fn=None,
                                       stride=1,
                                       padding='SAME',
                                       normalizer_fn=None,
                                       scope='MaskPredictor')
        batch_size = features.get_shape().as_list()[0]
        if batch_size is None:
            batch_size = tf.shape(features)[0]
        mask_predictions = tf.reshape(
            mask_predictions,
            [batch_size, -1, num_masks, self._mask_height, self._mask_width])
        return mask_predictions
コード例 #26
0
def attention_inception_v3(inputs,
                           num_classes=1000,
                           is_training=True,
                           dropout_keep_prob=0.8,
                           min_depth=16,
                           depth_multiplier=1.0,
                           prediction_fn=slim.softmax,
                           spatial_squeeze=True,
                           reuse=None,
                           create_aux_logits=True,
                           scope='InceptionV3',
                           global_pool=False,
                           attention_module='',
                           attention_position='all'):
    """Inception model from http://arxiv.org/abs/1512.00567.

  "Rethinking the Inception Architecture for Computer Vision"

  Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens,
  Zbigniew Wojna.

  With the default arguments this method constructs the exact model defined in
  the paper. However, one can experiment with variations of the inception_v3
  network by changing arguments dropout_keep_prob, min_depth and
  depth_multiplier.

  The default image size used to train this network is 299x299.

  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    num_classes: number of predicted classes. If 0 or None, the logits layer
      is omitted and the input features to the logits layer (before dropout)
      are returned instead.
    is_training: whether is training or not.
    dropout_keep_prob: the percentage of activation values that are retained.
    min_depth: Minimum depth value (number of channels) for all convolution ops.
      Enforced when depth_multiplier < 1, and not an active constraint when
      depth_multiplier >= 1.
    depth_multiplier: Float multiplier for the depth (number of channels)
      for all convolution ops. The value must be greater than zero. Typical
      usage will be to set this value in (0, 1) to reduce the number of
      parameters or computation cost of the model.
    prediction_fn: a function to get predictions out of logits.
    spatial_squeeze: if True, logits is of shape [B, C], if false logits is of
        shape [B, 1, 1, C], where B is batch_size and C is number of classes.
    reuse: whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    create_aux_logits: Whether to create the auxiliary logits.
    scope: Optional variable_scope.
    global_pool: Optional boolean flag to control the avgpooling before the
      logits layer. If false or unset, pooling is done with a fixed window
      that reduces default-sized inputs to 1x1, while larger inputs lead to
      larger outputs. If true, any input size is pooled down to 1x1.
    attention_module: Optional attention_module. Accepted values are '' or
      'se_block'.
    attention_position: Optional attention_position. Default is 'all'. Accepted
      values are 'head', 'extractor', and 'all'.

  Returns:
    net: a Tensor with the logits (pre-softmax activations) if num_classes
      is a non-zero integer, or the non-dropped-out input to the logits layer
      if num_classes is 0 or None.
    end_points: a dictionary from components of the network to the corresponding
      activation.

  Raises:
    ValueError: if 'depth_multiplier' is less than or equal to zero.
  """
    if depth_multiplier <= 0:
        raise ValueError('depth_multiplier is not greater than zero.')
    depth = lambda d: max(int(d * depth_multiplier), min_depth)

    with tf.variable_scope(scope, 'InceptionV3', [inputs],
                           reuse=reuse) as scope:
        with slim.arg_scope([slim.batch_norm, slim.dropout],
                            is_training=is_training):
            net, end_points = attention_inception_v3_base(
                inputs,
                scope=scope,
                min_depth=min_depth,
                depth_multiplier=depth_multiplier,
                attention_module=attention_module,
                attention_position=attention_position)

            # Auxiliary Head logits
            if create_aux_logits and num_classes:
                with slim.arg_scope(
                    [slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                        stride=1,
                        padding='SAME'):
                    aux_logits = end_points['Mixed_6e']
                    with tf.variable_scope('AuxLogits'):
                        aux_logits = slim.avg_pool2d(aux_logits, [5, 5],
                                                     stride=3,
                                                     padding='VALID',
                                                     scope='AvgPool_1a_5x5')
                        aux_logits = slim.conv2d(aux_logits,
                                                 depth(128), [1, 1],
                                                 scope='Conv2d_1b_1x1')

                        # Shape of feature map before the final layer.
                        kernel_size = _reduced_kernel_size_for_small_input(
                            aux_logits, [5, 5])
                        aux_logits = slim.conv2d(
                            aux_logits,
                            depth(768),
                            kernel_size,
                            weights_initializer=trunc_normal(0.01),
                            padding='VALID',
                            scope='Conv2d_2a_{}x{}'.format(*kernel_size))
                        aux_logits = slim.conv2d(
                            aux_logits,
                            num_classes, [1, 1],
                            activation_fn=None,
                            normalizer_fn=None,
                            weights_initializer=trunc_normal(0.001),
                            scope='Conv2d_2b_1x1')
                        if spatial_squeeze:
                            aux_logits = tf.squeeze(aux_logits, [1, 2],
                                                    name='SpatialSqueeze')
                        end_points['AuxLogits'] = aux_logits

            # Final pooling and prediction
            with tf.variable_scope('Logits'):
                if global_pool:
                    # Global average pooling.
                    net = tf.reduce_mean(input_tensor=net,
                                         axis=[1, 2],
                                         keepdims=True,
                                         name='GlobalPool')
                    end_points['global_pool'] = net
                else:
                    # Pooling with a fixed kernel size.
                    kernel_size = _reduced_kernel_size_for_small_input(
                        net, [8, 8])
                    net = slim.avg_pool2d(
                        net,
                        kernel_size,
                        padding='VALID',
                        scope='AvgPool_1a_{}x{}'.format(*kernel_size))
                    end_points['AvgPool_1a'] = net
                if not num_classes:
                    return net, end_points
                # 1 x 1 x 2048
                net = slim.dropout(net,
                                   keep_prob=dropout_keep_prob,
                                   scope='Dropout_1b')
                end_points['PreLogits'] = net
                # 2048
                logits = slim.conv2d(net,
                                     num_classes, [1, 1],
                                     activation_fn=None,
                                     normalizer_fn=None,
                                     scope='Conv2d_1c_1x1')
                if spatial_squeeze:
                    logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze')
                # 1000
            end_points['Logits'] = logits
            end_points['Predictions'] = prediction_fn(logits,
                                                      scope='Predictions')
    return logits, end_points
コード例 #27
0
def vgg_a(inputs,
          num_classes=1000,
          is_training=True,
          dropout_keep_prob=0.5,
          spatial_squeeze=True,
          scope='vgg_a',
          fc_conv_padding='VALID',
          global_pool=False):
  """Oxford Net VGG 11-Layers version A Example.

  Note: All the fully_connected layers have been transformed to conv2d layers.
        To use in classification mode, resize input to 224x224.

  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    num_classes: number of predicted classes. If 0 or None, the logits layer is
      omitted and the input features to the logits layer are returned instead.
    is_training: whether or not the model is being trained.
    dropout_keep_prob: the probability that activations are kept in the dropout
      layers during training.
    spatial_squeeze: whether or not should squeeze the spatial dimensions of the
      outputs. Useful to remove unnecessary dimensions for classification.
    scope: Optional scope for the variables.
    fc_conv_padding: the type of padding to use for the fully connected layer
      that is implemented as a convolutional layer. Use 'SAME' padding if you
      are applying the network in a fully convolutional manner and want to
      get a prediction map downsampled by a factor of 32 as an output.
      Otherwise, the output prediction map will be (input / 32) - 6 in case of
      'VALID' padding.
    global_pool: Optional boolean flag. If True, the input to the classification
      layer is avgpooled to size 1x1, for any input size. (This is not part
      of the original VGG architecture.)

  Returns:
    net: the output of the logits layer (if num_classes is a non-zero integer),
      or the input to the logits layer (if num_classes is 0 or None).
    end_points: a dict of tensors with intermediate activations.
  """
  with tf.compat.v1.variable_scope(scope, 'vgg_a', [inputs]) as sc:
    end_points_collection = sc.original_name_scope + '_end_points'
    # Collect outputs for conv2d, fully_connected and max_pool2d.
    with slim.arg_scope([slim.conv2d, slim.max_pool2d],
                        outputs_collections=end_points_collection):
      net = slim.repeat(inputs, 1, slim.conv2d, 64, [3, 3], scope='conv1')
      net = slim.max_pool2d(net, [2, 2], scope='pool1')
      net = slim.repeat(net, 1, slim.conv2d, 128, [3, 3], scope='conv2')
      net = slim.max_pool2d(net, [2, 2], scope='pool2')
      net = slim.repeat(net, 2, slim.conv2d, 256, [3, 3], scope='conv3')
      net = slim.max_pool2d(net, [2, 2], scope='pool3')
      net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv4')
      net = slim.max_pool2d(net, [2, 2], scope='pool4')
      net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv5')
      net = slim.max_pool2d(net, [2, 2], scope='pool5')

      # Use conv2d instead of fully_connected layers.
      net = slim.conv2d(net, 4096, [7, 7], padding=fc_conv_padding, scope='fc6')
      net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
                         scope='dropout6')
      net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
      # Convert end_points_collection into a end_point dict.
      end_points = slim.utils.convert_collection_to_dict(end_points_collection)
      if global_pool:
        net = tf.reduce_mean(input_tensor=net, axis=[1, 2], keepdims=True, name='global_pool')
        end_points['global_pool'] = net
      if num_classes:
        net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
                           scope='dropout7')
        net = slim.conv2d(net, num_classes, [1, 1],
                          activation_fn=None,
                          normalizer_fn=None,
                          scope='fc8')
        if spatial_squeeze:
          net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
        end_points[sc.name + '/fc8'] = net
      return net, end_points
コード例 #28
0
def mobilenet_v1(inputs,
                 num_classes=1000,
                 dropout_keep_prob=0.999,
                 is_training=True,
                 min_depth=8,
                 depth_multiplier=1.0,
                 conv_defs=None,
                 prediction_fn=slim.softmax,
                 spatial_squeeze=True,
                 reuse=None,
                 scope='MobilenetV1',
                 global_pool=False):
    """Mobilenet v1 model for classification.

  Args:
    inputs: a tensor of shape [batch_size, height, width, channels].
    num_classes: number of predicted classes. If 0 or None, the logits layer
      is omitted and the input features to the logits layer (before dropout)
      are returned instead.
    dropout_keep_prob: the percentage of activation values that are retained.
    is_training: whether is training or not.
    min_depth: Minimum depth value (number of channels) for all convolution ops.
      Enforced when depth_multiplier < 1, and not an active constraint when
      depth_multiplier >= 1.
    depth_multiplier: Float multiplier for the depth (number of channels)
      for all convolution ops. The value must be greater than zero. Typical
      usage will be to set this value in (0, 1) to reduce the number of
      parameters or computation cost of the model.
    conv_defs: A list of ConvDef namedtuples specifying the net architecture.
    prediction_fn: a function to get predictions out of logits.
    spatial_squeeze: if True, logits is of shape is [B, C], if false logits is
        of shape [B, 1, 1, C], where B is batch_size and C is number of classes.
    reuse: whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    scope: Optional variable_scope.
    global_pool: Optional boolean flag to control the avgpooling before the
      logits layer. If false or unset, pooling is done with a fixed window
      that reduces default-sized inputs to 1x1, while larger inputs lead to
      larger outputs. If true, any input size is pooled down to 1x1.

  Returns:
    net: a 2D Tensor with the logits (pre-softmax activations) if num_classes
      is a non-zero integer, or the non-dropped-out input to the logits layer
      if num_classes is 0 or None.
    end_points: a dictionary from components of the network to the corresponding
      activation.

  Raises:
    ValueError: Input rank is invalid.
  """
    input_shape = inputs.get_shape().as_list()
    if len(input_shape) != 4:
        raise ValueError('Invalid input tensor rank, expected 4, was: %d' %
                         len(input_shape))

    with tf.compat.v1.variable_scope(scope,
                                     'MobilenetV1', [inputs],
                                     reuse=reuse) as scope:
        with slim.arg_scope([slim.batch_norm, slim.dropout],
                            is_training=is_training):
            net, end_points = mobilenet_v1_base(
                inputs,
                scope=scope,
                min_depth=min_depth,
                depth_multiplier=depth_multiplier,
                conv_defs=conv_defs)
            with tf.compat.v1.variable_scope('Logits'):
                if global_pool:
                    # Global average pooling.
                    net = tf.reduce_mean(input_tensor=net,
                                         axis=[1, 2],
                                         keepdims=True,
                                         name='global_pool')
                    end_points['global_pool'] = net
                else:
                    # Pooling with a fixed kernel size.
                    kernel_size = _reduced_kernel_size_for_small_input(
                        net, [7, 7])
                    net = slim.avg_pool2d(net,
                                          kernel_size,
                                          padding='VALID',
                                          scope='AvgPool_1a')
                    end_points['AvgPool_1a'] = net
                if not num_classes:
                    return net, end_points
                # 1 x 1 x 1024
                net = slim.dropout(net,
                                   keep_prob=dropout_keep_prob,
                                   scope='Dropout_1b')
                logits = slim.conv2d(net,
                                     num_classes, [1, 1],
                                     activation_fn=None,
                                     normalizer_fn=None,
                                     scope='Conv2d_1c_1x1')
                if spatial_squeeze:
                    logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze')
            end_points['Logits'] = logits
            if prediction_fn:
                end_points['Predictions'] = prediction_fn(logits,
                                                          scope='Predictions')
    return logits, end_points
コード例 #29
0
def inception_resnet_v1(inputs,
                        is_training=True,
                        dropout_keep_prob=0.8,
                        bottleneck_layer_size=128,
                        reuse=None,
                        scope='InceptionResnetV1'):
    """Creates the Inception Resnet V1 model.
    Args:
      inputs: a 4-D tensor of size [batch_size, height, width, 3].
      num_classes: number of predicted classes.
      is_training: whether is training or not.
      dropout_keep_prob: float, the fraction to keep before final layer.
      reuse: whether or not the network and its variables should be reused. To be
        able to reuse 'scope' must be given.
      scope: Optional variable_scope.
    Returns:
      logits: the logits outputs of the model.
      end_points: the set of end_points from the inception model.
    """
    end_points = {}

    with tf.variable_scope(scope, 'InceptionResnetV1', [inputs], reuse=reuse):
        with slim.arg_scope([slim.batch_norm, slim.dropout],
                            is_training=is_training):
            with slim.arg_scope(
                [slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                    stride=1,
                    padding='SAME'):
                # 149 x 149 x 32
                net = slim.conv2d(inputs,
                                  32,
                                  3,
                                  stride=2,
                                  padding='VALID',
                                  scope='Conv2d_1a_3x3')
                end_points['Conv2d_1a_3x3'] = net
                # 147 x 147 x 32
                net = slim.conv2d(net,
                                  32,
                                  3,
                                  padding='VALID',
                                  scope='Conv2d_2a_3x3')
                end_points['Conv2d_2a_3x3'] = net
                # 147 x 147 x 64
                net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3')
                end_points['Conv2d_2b_3x3'] = net
                # 73 x 73 x 64
                net = slim.max_pool2d(net,
                                      3,
                                      stride=2,
                                      padding='VALID',
                                      scope='MaxPool_3a_3x3')
                end_points['MaxPool_3a_3x3'] = net
                # 73 x 73 x 80
                net = slim.conv2d(net,
                                  80,
                                  1,
                                  padding='VALID',
                                  scope='Conv2d_3b_1x1')
                end_points['Conv2d_3b_1x1'] = net
                # 71 x 71 x 192
                net = slim.conv2d(net,
                                  192,
                                  3,
                                  padding='VALID',
                                  scope='Conv2d_4a_3x3')
                end_points['Conv2d_4a_3x3'] = net
                # 35 x 35 x 256
                net = slim.conv2d(net,
                                  256,
                                  3,
                                  stride=2,
                                  padding='VALID',
                                  scope='Conv2d_4b_3x3')
                end_points['Conv2d_4b_3x3'] = net

                # 5 x Inception-resnet-A
                net = slim.repeat(net, 5, block35, scale=0.17)

                # Reduction-A
                with tf.variable_scope('Mixed_6a'):
                    net = reduction_a(net, 192, 192, 256, 384)
                end_points['Mixed_6a'] = net

                # 10 x Inception-Resnet-B
                net = slim.repeat(net, 10, block17, scale=0.10)

                # Reduction-B
                with tf.variable_scope('Mixed_7a'):
                    net = reduction_b(net)
                end_points['Mixed_7a'] = net

                # 5 x Inception-Resnet-C
                net = slim.repeat(net, 5, block8, scale=0.20)
                net = block8(net, activation_fn=None)

                with tf.variable_scope('Logits'):
                    end_points['PrePool'] = net
                    # pylint: disable=no-member
                    net = slim.avg_pool2d(net,
                                          net.get_shape()[1:3],
                                          padding='VALID',
                                          scope='AvgPool_1a_8x8')
                    net = slim.flatten(net)

                    net = slim.dropout(net,
                                       dropout_keep_prob,
                                       is_training=is_training,
                                       scope='Dropout')

                    end_points['PreLogitsFlatten'] = net

                net = slim.fully_connected(net,
                                           bottleneck_layer_size,
                                           activation_fn=None,
                                           scope='Bottleneck',
                                           reuse=False)

    return net, end_points
コード例 #30
0
def inception_resnet_v2(inputs,
                        is_training=True,
                        dropout_keep_prob=0.8,
                        bottleneck_layer_size=128,
                        reuse=None,
                        scope='InceptionResnetV2'):
    """Creates the Inception Resnet V2 model.
    Args:
      inputs: a 4-D tensor of size [batch_size, height, width, 3].
      num_classes: number of predicted classes.
      is_training: whether is training or not.
      dropout_keep_prob: float, the fraction to keep before final layer.
      reuse: whether or not the network and its variables should be reused. To be
        able to reuse 'scope' must be given.
      scope: Optional variable_scope.
    Returns:
      logits: the logits outputs of the model.
      end_points: the set of end_points from the inception model.
    """
    end_points = {}

    with tf.variable_scope(scope, 'InceptionResnetV2', [inputs], reuse=reuse):
        with slim.arg_scope([slim.batch_norm, slim.dropout],
                            is_training=is_training):
            with slim.arg_scope(
                [slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                    stride=1,
                    padding='SAME'):

                # 149 x 149 x 32
                net = slim.conv2d(inputs,
                                  32,
                                  3,
                                  stride=2,
                                  padding='VALID',
                                  scope='Conv2d_1a_3x3')
                end_points['Conv2d_1a_3x3'] = net
                # 147 x 147 x 32
                net = slim.conv2d(net,
                                  32,
                                  3,
                                  padding='VALID',
                                  scope='Conv2d_2a_3x3')
                end_points['Conv2d_2a_3x3'] = net
                # 147 x 147 x 64
                net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3')
                end_points['Conv2d_2b_3x3'] = net
                # 73 x 73 x 64
                net = slim.max_pool2d(net,
                                      3,
                                      stride=2,
                                      padding='VALID',
                                      scope='MaxPool_3a_3x3')
                end_points['MaxPool_3a_3x3'] = net
                # 73 x 73 x 80
                net = slim.conv2d(net,
                                  80,
                                  1,
                                  padding='VALID',
                                  scope='Conv2d_3b_1x1')
                end_points['Conv2d_3b_1x1'] = net
                # 71 x 71 x 192
                net = slim.conv2d(net,
                                  192,
                                  3,
                                  padding='VALID',
                                  scope='Conv2d_4a_3x3')
                end_points['Conv2d_4a_3x3'] = net
                # 35 x 35 x 192
                net = slim.max_pool2d(net,
                                      3,
                                      stride=2,
                                      padding='VALID',
                                      scope='MaxPool_5a_3x3')
                end_points['MaxPool_5a_3x3'] = net

                # 35 x 35 x 320
                with tf.variable_scope('Mixed_5b'):
                    with tf.variable_scope('Branch_0'):
                        tower_conv = slim.conv2d(net,
                                                 96,
                                                 1,
                                                 scope='Conv2d_1x1')
                    with tf.variable_scope('Branch_1'):
                        tower_conv1_0 = slim.conv2d(net,
                                                    48,
                                                    1,
                                                    scope='Conv2d_0a_1x1')
                        tower_conv1_1 = slim.conv2d(tower_conv1_0,
                                                    64,
                                                    5,
                                                    scope='Conv2d_0b_5x5')
                    with tf.variable_scope('Branch_2'):
                        tower_conv2_0 = slim.conv2d(net,
                                                    64,
                                                    1,
                                                    scope='Conv2d_0a_1x1')
                        tower_conv2_1 = slim.conv2d(tower_conv2_0,
                                                    96,
                                                    3,
                                                    scope='Conv2d_0b_3x3')
                        tower_conv2_2 = slim.conv2d(tower_conv2_1,
                                                    96,
                                                    3,
                                                    scope='Conv2d_0c_3x3')
                    with tf.variable_scope('Branch_3'):
                        tower_pool = slim.avg_pool2d(net,
                                                     3,
                                                     stride=1,
                                                     padding='SAME',
                                                     scope='AvgPool_0a_3x3')
                        tower_pool_1 = slim.conv2d(tower_pool,
                                                   64,
                                                   1,
                                                   scope='Conv2d_0b_1x1')
                    net = tf.concat([
                        tower_conv, tower_conv1_1, tower_conv2_2, tower_pool_1
                    ], 3)

                end_points['Mixed_5b'] = net
                net = slim.repeat(net, 10, block35, scale=0.17)

                # 17 x 17 x 1024
                with tf.variable_scope('Mixed_6a'):
                    with tf.variable_scope('Branch_0'):
                        tower_conv = slim.conv2d(net,
                                                 384,
                                                 3,
                                                 stride=2,
                                                 padding='VALID',
                                                 scope='Conv2d_1a_3x3')
                    with tf.variable_scope('Branch_1'):
                        tower_conv1_0 = slim.conv2d(net,
                                                    256,
                                                    1,
                                                    scope='Conv2d_0a_1x1')
                        tower_conv1_1 = slim.conv2d(tower_conv1_0,
                                                    256,
                                                    3,
                                                    scope='Conv2d_0b_3x3')
                        tower_conv1_2 = slim.conv2d(tower_conv1_1,
                                                    384,
                                                    3,
                                                    stride=2,
                                                    padding='VALID',
                                                    scope='Conv2d_1a_3x3')
                    with tf.variable_scope('Branch_2'):
                        tower_pool = slim.max_pool2d(net,
                                                     3,
                                                     stride=2,
                                                     padding='VALID',
                                                     scope='MaxPool_1a_3x3')
                    net = tf.concat([tower_conv, tower_conv1_2, tower_pool], 3)

                end_points['Mixed_6a'] = net
                net = slim.repeat(net, 20, block17, scale=0.10)

                with tf.variable_scope('Mixed_7a'):
                    with tf.variable_scope('Branch_0'):
                        tower_conv = slim.conv2d(net,
                                                 256,
                                                 1,
                                                 scope='Conv2d_0a_1x1')
                        tower_conv_1 = slim.conv2d(tower_conv,
                                                   384,
                                                   3,
                                                   stride=2,
                                                   padding='VALID',
                                                   scope='Conv2d_1a_3x3')
                    with tf.variable_scope('Branch_1'):
                        tower_conv1 = slim.conv2d(net,
                                                  256,
                                                  1,
                                                  scope='Conv2d_0a_1x1')
                        tower_conv1_1 = slim.conv2d(tower_conv1,
                                                    288,
                                                    3,
                                                    stride=2,
                                                    padding='VALID',
                                                    scope='Conv2d_1a_3x3')
                    with tf.variable_scope('Branch_2'):
                        tower_conv2 = slim.conv2d(net,
                                                  256,
                                                  1,
                                                  scope='Conv2d_0a_1x1')
                        tower_conv2_1 = slim.conv2d(tower_conv2,
                                                    288,
                                                    3,
                                                    scope='Conv2d_0b_3x3')
                        tower_conv2_2 = slim.conv2d(tower_conv2_1,
                                                    320,
                                                    3,
                                                    stride=2,
                                                    padding='VALID',
                                                    scope='Conv2d_1a_3x3')
                    with tf.variable_scope('Branch_3'):
                        tower_pool = slim.max_pool2d(net,
                                                     3,
                                                     stride=2,
                                                     padding='VALID',
                                                     scope='MaxPool_1a_3x3')
                    net = tf.concat([
                        tower_conv_1, tower_conv1_1, tower_conv2_2, tower_pool
                    ], 3)

                end_points['Mixed_7a'] = net

                net = slim.repeat(net, 9, block8, scale=0.20)
                net = block8(net, activation_fn=None)

                net = slim.conv2d(net, 1536, 1, scope='Conv2d_7b_1x1')
                end_points['Conv2d_7b_1x1'] = net

                with tf.variable_scope('Logits'):
                    end_points['PrePool'] = net
                    #pylint: disable=no-member
                    net = slim.avg_pool2d(net,
                                          net.get_shape()[1:3],
                                          padding='VALID',
                                          scope='AvgPool_1a_8x8')
                    net = slim.flatten(net)

                    net = slim.dropout(net,
                                       dropout_keep_prob,
                                       is_training=is_training,
                                       scope='Dropout')

                    end_points['PreLogitsFlatten'] = net

                net = slim.fully_connected(net,
                                           bottleneck_layer_size,
                                           activation_fn=None,
                                           scope='Bottleneck',
                                           reuse=False)

    return net, end_points