Beispiel #1
0
def conv_net(inputs):
    '''
    Build a CNN.

    Parameters
    ----------
    inputs : input data

    Returns
    -------
    net : a CNN architecture
    '''

    # using the scope to avoid mentioning the parameters repeatedly
    with slim.arg_scope([slim.conv2d, slim.fully_connected],
        activation_fn = leaky_relu(0.005),
        weights_initializer = tf.truncated_normal_initializer(0.0, 0.01),
        weights_regularizer = slim.l2_regularizer(0.0005)):

        net = slim.conv2d(inputs, 512, (3, inputs.shape[2]), 1, padding = 'valid', scope = 'conv_1') # (3, dimension_count)
        net = slim.max_pool2d(net, (4, 1), 4, padding = 'valid', scope = 'pool_2')
        net = slim.conv2d(net, 512, (5, 1), 1, scope = 'conv_3')
        net = slim.max_pool2d(net, (4, 1), 4, padding = 'valid', scope = 'pool_4')
        net = slim.flatten(net, scope = 'flatten_5')
        net = slim.fully_connected(net, 2, scope = 'fc_6', activation_fn = tf.nn.softmax)

    return net
Beispiel #2
0
    def predict(self, preprocessed_inputs, true_image_shapes):
        """Prediction tensors from inputs tensor.

    Args:
      preprocessed_inputs: a [batch, 28, 28, channels] float32 tensor.
      true_image_shapes: int32 tensor of shape [batch, 3] where each row is
        of the form [height, width, channels] indicating the shapes
        of true images in the resized images, as resized images can be padded
        with zeros.

    Returns:
      prediction_dict: a dictionary holding prediction tensors to be
        passed to the Loss or Postprocess functions.
    """
        flattened_inputs = slim.flatten(preprocessed_inputs)
        class_prediction = slim.fully_connected(flattened_inputs,
                                                self._num_classes)
        box_prediction = slim.fully_connected(flattened_inputs, 4)

        return {
            'class_predictions_with_background':
            tf.reshape(class_prediction, [-1, 1, self._num_classes]),
            'box_encodings':
            tf.reshape(box_prediction, [-1, 1, 4])
        }
def define_vggish_slim(training=False):
    """Defines the VGGish TensorFlow model.
  All ops are created in the current default graph, under the scope 'vggish/'.
  The input is a placeholder named 'vggish/input_features' of type float32 and
  shape [batch_size, num_frames, num_bands] where batch_size is variable and
  num_frames and num_bands are constants, and [num_frames, num_bands] represents
  a log-mel-scale spectrogram patch covering num_bands frequency bands and
  num_frames time frames (where each frame step is usually 10ms). This is
  produced by computing the stabilized log(mel-spectrogram + params.LOG_OFFSET).
  The output is an op named 'vggish/embedding' which produces the activations of
  a 128-D embedding layer, which is usually the penultimate layer when used as
  part of a full model with a final classifier layer.
  Args:
    training: If true, all parameters are marked trainable.
  Returns:
    The op 'vggish/embeddings'.
  """
    # Defaults:
    # - All weights are initialized to N(0, INIT_STDDEV).
    # - All biases are initialized to 0.
    # - All activations are ReLU.
    # - All convolutions are 3x3 with stride 1 and SAME padding.
    # - All max-pools are 2x2 with stride 2 and SAME padding.
    with slim.arg_scope([slim.conv2d, slim.fully_connected],
                        weights_initializer=tf.truncated_normal_initializer(
                            stddev=params.INIT_STDDEV),
                        biases_initializer=tf.zeros_initializer(),
                        activation_fn=tf.nn.relu,
                        trainable=training), \
         slim.arg_scope([slim.conv2d],
                        kernel_size=[3, 3], stride=1, padding='SAME'), \
         slim.arg_scope([slim.max_pool2d],
                        kernel_size=[2, 2], stride=2, padding='SAME'), \
         tf.variable_scope('vggish'):
        # Input: a batch of 2-D log-mel-spectrogram patches.
        features = tf.placeholder(tf.float32,
                                  shape=(None, params.NUM_FRAMES,
                                         params.NUM_BANDS),
                                  name='input_features')
        # Reshape to 4-D so that we can convolve a batch with conv2d().
        net = tf.reshape(features,
                         [-1, params.NUM_FRAMES, params.NUM_BANDS, 1])

        # The VGG stack of alternating convolutions and max-pools.
        net = slim.conv2d(net, 64, scope='conv1')
        net = slim.max_pool2d(net, scope='pool1')
        net = slim.conv2d(net, 128, scope='conv2')
        net = slim.max_pool2d(net, scope='pool2')
        net = slim.repeat(net, 2, slim.conv2d, 256, scope='conv3')
        net = slim.max_pool2d(net, scope='pool3')
        net = slim.repeat(net, 2, slim.conv2d, 512, scope='conv4')
        net = slim.max_pool2d(net, scope='pool4')

        # Flatten before entering fully-connected layers
        net = slim.flatten(net)
        net = slim.repeat(net, 2, slim.fully_connected, 4096, scope='fc1')
        # The embedding layer.
        net = slim.fully_connected(net, params.EMBEDDING_SIZE, scope='fc2')
        return tf.identity(net, name='embedding')
def model(
    inputs,
    is_training = True,
    dropout_keep_prob = 0.8,
    reuse = None,
    scope = 'InceptionV4',
    bottleneck_dim = 512,
):
    # inputs = tf.image.grayscale_to_rgb(inputs)
    with tf.variable_scope(
        scope, 'InceptionV4', [inputs], reuse = reuse
    ) as scope:
        with slim.arg_scope(
            [slim.batch_norm, slim.dropout], is_training = is_training
        ):
            net, end_points = inception_v4_base(inputs, scope = scope)
            print(net.shape)

            with slim.arg_scope(
                [slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                stride = 1,
                padding = 'SAME',
            ):
                with tf.variable_scope('Logits'):
                    # 8 x 8 x 1536
                    kernel_size = net.get_shape()[1:3]
                    print(kernel_size)
                    if kernel_size.is_fully_defined():
                        net = slim.avg_pool2d(
                            net,
                            kernel_size,
                            padding = 'VALID',
                            scope = 'AvgPool_1a',
                        )
                    else:
                        net = tf.reduce_mean(
                            input_tensor = net,
                            axis = [1, 2],
                            keepdims = True,
                            name = 'global_pool',
                        )
                    end_points['global_pool'] = net
                    # 1 x 1 x 1536
                    net = slim.dropout(
                        net, dropout_keep_prob, scope = 'Dropout_1b'
                    )
                    net = slim.flatten(net, scope = 'PreLogitsFlatten')
                    end_points['PreLogitsFlatten'] = net

                    bottleneck = slim.fully_connected(
                        net, bottleneck_dim, scope = 'bottleneck'
                    )
                    logits = slim.fully_connected(
                        bottleneck,
                        2,
                        activation_fn = None,
                        scope = 'Logits_vad',
                    )
                    return logits
Beispiel #5
0
def create_net(
    SPEC_HEIGHT,
    HWW_X,
    LEARN_LOG,
    NUM_FILTERS,
    WIGGLE_ROOM,
    CONV_FILTER_WIDTH,
    NUM_DENSE_UNITS,
    DO_BATCH_NORM,
):

    channels = 4
    net = collections.OrderedDict()

    net["input"] = tf.compat.v1.placeholder(
        tf.float32, (None, SPEC_HEIGHT, HWW_X * 2, channels), name="input"
    )
    net["conv1_1"] = slim.conv2d(
        net["input"],
        NUM_FILTERS,
        (SPEC_HEIGHT - WIGGLE_ROOM, CONV_FILTER_WIDTH),
        padding="valid",
        activation_fn=None,
        biases_initializer=None,
    )
    net["conv1_1"] = tf.nn.leaky_relu(net["conv1_1"], alpha=1 / 3)

    net["conv1_2"] = slim.conv2d(
        net["conv1_1"],
        NUM_FILTERS,
        (1, 3),
        padding="valid",
        activation_fn=None,
        biases_initializer=None,
    )
    net["conv1_2"] = tf.nn.leaky_relu(net["conv1_2"], alpha=1 / 3)

    W = net["conv1_2"].shape[2]
    net["pool2"] = slim.max_pool2d(net["conv1_2"], kernel_size=(1, W), stride=(1, 1))

    net["pool2"] = tf.transpose(net["pool2"], (0, 3, 2, 1))
    net["pool2_flat"] = slim.flatten(net["pool2"])

    net["fc6"] = slim.fully_connected(
        net["pool2_flat"], NUM_DENSE_UNITS, activation_fn=None, biases_initializer=None
    )
    net["fc6"] = tf.nn.leaky_relu(net["fc6"], alpha=1 / 3)

    net["fc7"] = slim.fully_connected(
        net["fc6"], NUM_DENSE_UNITS, activation_fn=None, biases_initializer=None
    )
    net["fc7"] = tf.nn.leaky_relu(net["fc7"], alpha=1 / 3)

    net["fc8"] = slim.fully_connected(net["fc7"], 2, activation_fn=None)
    # net['fc8'] = tf.nn.leaky_relu(net['fc8'], alpha=1/3)
    net["output"] = tf.nn.softmax(net["fc8"])

    return net
def model(
    inputs,
    is_training=True,
    dropout_keep_prob=0.8,
    reuse=None,
    scope='InceptionV4',
    create_aux_logits=True,
    num_classes=2,
):
    with tf.variable_scope(scope, 'InceptionV4', [inputs],
                           reuse=reuse) as scope:
        with slim.arg_scope([slim.batch_norm, slim.dropout],
                            is_training=is_training):
            net, end_points = inception_v4_base(inputs, scope=scope)
            print(net.shape)

            with slim.arg_scope(
                [slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                    stride=1,
                    padding='SAME',
            ):

                # Final pooling and prediction
                # TODO(sguada,arnoegw): Consider adding a parameter global_pool which
                # can be set to False to disable pooling here (as in resnet_*()).
                with tf.variable_scope('Logits'):
                    # 8 x 8 x 1536
                    kernel_size = net.get_shape()[1:3]
                    print(kernel_size)
                    if kernel_size.is_fully_defined():
                        net = slim.avg_pool2d(
                            net,
                            kernel_size,
                            padding='VALID',
                            scope='AvgPool_1a',
                        )
                    else:
                        net = tf.reduce_mean(
                            input_tensor=net,
                            axis=[1, 2],
                            keepdims=True,
                            name='global_pool',
                        )
                    end_points['global_pool'] = net
                    # 1 x 1 x 1536
                    net = slim.dropout(net,
                                       dropout_keep_prob,
                                       scope='Dropout_1b')
                    net = slim.flatten(net, scope='PreLogitsFlatten')
                    end_points['PreLogitsFlatten'] = net
                    # 1536
                    logits = slim.fully_connected(net,
                                                  num_classes,
                                                  activation_fn=None,
                                                  scope='Logits')
                    return logits
Beispiel #7
0
    def create_network(self,mH=128):
        # Placeholder : Inserts a placeholder for a tensor that will be always fed.
        self.scalarInput = tf.placeholder(shape=[None,49], dtype=tf.float32)
        self.imageIn = tf.reshape(self.scalarInput, shape=[-1,7,7,1])
        #Same: input and output dimension would be the same but for valid the output dimension will be less.
        self.conv1 = slim.conv2d(inputs=self.imageIn, num_outputs=4, kernel_size=[3,3], stride=[1,1],
                                 activation_fn=tf.nn.tanh, padding='SAME', biases_initializer=None)
        self.conv2 = slim.conv2d(inputs=self.conv1, num_outputs=16, kernel_size= [3,3], stride=[2,2],
                                 activation_fn=tf.nn.tanh,padding='SAME', biases_initializer=None)
        self.conv3 = slim.conv2d(inputs=self.conv2, num_outputs=32, kernel_size= [3,3], stride=[2,2],
                                 activation_fn=tf.nn.tanh,padding='SAME', biases_initializer=None)
        self.conv4 = slim.conv2d(inputs=self.conv3, num_outputs=mH, kernel_size= [2,2], stride=[1,1],
                                 activation_fn=tf.nn.tanh,padding='VALID', biases_initializer=None)

        # duel DQN, outputs concludes advantage and value streams
        #converts a tensor 2-Dim to a vector
        self.layer4 = slim.flatten(self.conv4)
        # Xavier initializes a weight arbitrarily.
        xavier_init = tf.contrib.layers.xavier_initializer()
        self.W1 = tf.Variable(xavier_init([mH,mH]))
        self.b1 = tf.Variable(tf.zeros([mH]))
        self.layer5 = tf.nn.relu(tf.matmul(self.layer4,self.W1)+self.b1)
        self.W2 = tf.Variable(xavier_init([mH,mH]))
        self.b2 = tf.Variable(tf.zeros([mH]))
        self.layer6 = tf.nn.relu(tf.matmul(self.layer5,self.W2)+self.b2)
        self.streamA, self.streamV = tf.split(self.layer6,2,1)  # tf.split(data, number, axis)

        # 4 actions
        self.AW = tf.Variable(xavier_init([mH//2, 28]))  # AW [h_size//2, 7*4]
        self.Ab = tf.Variable(tf.zeros([28]))
        self.VW = tf.Variable(xavier_init([mH//2, 7]))  # value V(s)
        self.Vb = tf.Variable(tf.zeros([7]))
        self.Advantage = tf.matmul(self.streamA, self.AW)+self.Ab
        self.Value = tf.matmul(self.streamV, self.VW)+self.Vb
        # Q(s,a)
        self.Advantage = tf.reshape(self.Advantage, [-1, 7, 4])
        # Action (non-binary)
        self.Value = tf.reshape(self.Value, [-1,7,1])

        # combine advantage and value network together
        self.Qout = self.Value+tf.subtract(self.Advantage, tf.reduce_mean(self.Advantage, axis=2, keep_dims=True))  # 1*7*4 --> Q(s,a)
        self.predict = tf.argmax(self.Qout, 2)  # the predicted actions for each component 1*7*1 --> actions
        # self.predict_a = tf.nn.softmax(self.Qout,2)

        self.targetQ = tf.placeholder(shape=[None,7], dtype = tf.float32)
        self.actions = tf.placeholder(shape=[None,7], dtype = tf.int32)
        self.actions_onehot = tf.one_hot(self.actions, 4, dtype = tf.float32)

        self.Q = tf.reduce_mean(tf.multiply(self.Qout, self.actions_onehot),axis=2)
        self.td_error = tf.reduce_mean(tf.square(self.targetQ-self.Q))
        self.loss = tf.reduce_mean(self.td_error)
        self.trainer = tf.train.AdamOptimizer(learning_rate = 0.001)  # training rules
        self.updateModel = self.trainer.minimize(self.loss)  # training target
Beispiel #8
0
def create_net(SPEC_HEIGHT, HWW_X, LEARN_LOG, NUM_FILTERS, WIGGLE_ROOM,
               CONV_FILTER_WIDTH, NUM_DENSE_UNITS, DO_BATCH_NORM):

    tf.compat.v1.disable_eager_execution()

    channels = 4
    net = collections.OrderedDict()

    net['input'] = tf.placeholder(tf.float32,
                                  (None, SPEC_HEIGHT, HWW_X * 2, channels),
                                  name='input')
    net['conv1_1'] = slim.conv2d(
        net['input'],
        NUM_FILTERS, (SPEC_HEIGHT - WIGGLE_ROOM, CONV_FILTER_WIDTH),
        padding='valid',
        activation_fn=None,
        biases_initializer=None)
    net['conv1_1'] = tf.nn.leaky_relu(net['conv1_1'], alpha=1 / 3)

    net['conv1_2'] = slim.conv2d(net['conv1_1'],
                                 NUM_FILTERS, (1, 3),
                                 padding='valid',
                                 activation_fn=None,
                                 biases_initializer=None)
    net['conv1_2'] = tf.nn.leaky_relu(net['conv1_2'], alpha=1 / 3)

    W = net['conv1_2'].shape[2]
    net['pool2'] = slim.max_pool2d(net['conv1_2'],
                                   kernel_size=(1, W),
                                   stride=(1, 1))

    net['pool2'] = tf.transpose(net['pool2'], (0, 3, 2, 1))
    net['pool2_flat'] = slim.flatten(net['pool2'])

    net['fc6'] = slim.fully_connected(net['pool2_flat'],
                                      NUM_DENSE_UNITS,
                                      activation_fn=None,
                                      biases_initializer=None)
    net['fc6'] = tf.nn.leaky_relu(net['fc6'], alpha=1 / 3)

    net['fc7'] = slim.fully_connected(net['fc6'],
                                      NUM_DENSE_UNITS,
                                      activation_fn=None,
                                      biases_initializer=None)
    net['fc7'] = tf.nn.leaky_relu(net['fc7'], alpha=1 / 3)

    net['fc8'] = slim.fully_connected(net['fc7'], 2, activation_fn=None)
    # net['fc8'] = tf.nn.leaky_relu(net['fc8'], alpha=1/3)
    net['output'] = tf.nn.softmax(net['fc8'])

    return net
def discriminator(images, num_classes, bottleneck_size=512, keep_prob=1.0, phase_train=True,
            weight_decay=0.0, reuse=None, scope='Discriminator'):
    print("discriminator input : ",images.shape)

    with slim.arg_scope([slim.conv2d, slim.fully_connected],
                        weights_regularizer=tf.keras.regularizers.l2(0.5 * (weight_decay)),
                        activation_fn=leaky_relu,
                        normalizer_fn=None,
                        normalizer_params=batch_norm_params):
        with tf.compat.v1.variable_scope(scope, [images], reuse=reuse):
            with slim.arg_scope([slim.batch_norm, slim.dropout],
                                is_training=phase_train):

                print('{} input shape:'.format(scope), [dim.value for dim in images.shape])

                net =conv(images, 32, kernel_size=4, stride=2, scope='conv1')
                print('module_1 shape:', [dim.value for dim in net.shape])
                
                net = conv(net, 64, kernel_size=4, stride=2, scope='conv2')
                print('module_2 shape:', [dim.value for dim in net.shape])

                net = conv(net, 128, kernel_size=4, stride=2, scope='conv3')
                print('module_3 shape:', [dim.value for dim in net.shape])
 
             
                net = conv(net, 256, kernel_size=4, stride=2, scope='conv4')
                print('module_4 shape:', [dim.value for dim in net.shape])

                net = conv(net, 512, kernel_size=4, stride=2, scope='conv5')
                print('module_5 shape:', [dim.value for dim in net.shape])


                # Patch Discrminator
                patch5_logits = slim.conv2d(net, 3, 1, activation_fn=None, normalizer_fn=None, scope='patch5_logits')
                patch_logits = tf.reshape(patch5_logits, [-1,3])

              
                # Global Discriminator
                net = slim.flatten(net)
                prelogits = slim.fully_connected(net, bottleneck_size, scope='Bottleneck',
                                        weights_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"),
                                        activation_fn=None, normalizer_fn=None)
                prelogits = tf.nn.l2_normalize(prelogits, axis=1)
                print('latent shape:', [dim.value for dim in prelogits.shape])

                logits = slim.fully_connected(prelogits, num_classes, scope='Logits',
                                    activation_fn=None, normalizer_fn=None)

                return patch_logits, logits
Beispiel #10
0
 def encoder(self, images, is_training):
     activation_fn = leaky_relu  # tf.nn.relu
     weight_decay = 0.0
     with tf.compat.v1.variable_scope('encoder'):
         with slim.arg_scope([slim.batch_norm], is_training=is_training):
             with slim.arg_scope(
                 [slim.conv2d, slim.fully_connected],
                     weights_initializer=tf.compat.v1.
                     truncated_normal_initializer(stddev=0.1),
                     weights_regularizer=tf.keras.regularizers.l2(
                         0.5 * (weight_decay)),
                     normalizer_fn=slim.batch_norm,
                     normalizer_params=self.batch_norm_params):
                 net = slim.conv2d(images,
                                   32, [4, 4],
                                   2,
                                   activation_fn=activation_fn,
                                   scope='Conv2d_1')
                 net = slim.conv2d(net,
                                   64, [4, 4],
                                   2,
                                   activation_fn=activation_fn,
                                   scope='Conv2d_2')
                 net = slim.conv2d(net,
                                   128, [4, 4],
                                   2,
                                   activation_fn=activation_fn,
                                   scope='Conv2d_3')
                 net = slim.conv2d(net,
                                   256, [4, 4],
                                   2,
                                   activation_fn=activation_fn,
                                   scope='Conv2d_4')
                 net = slim.flatten(net)
                 fc1 = slim.fully_connected(net,
                                            self.latent_variable_dim,
                                            activation_fn=None,
                                            normalizer_fn=None,
                                            scope='Fc_1')
                 fc2 = slim.fully_connected(net,
                                            self.latent_variable_dim,
                                            activation_fn=None,
                                            normalizer_fn=None,
                                            scope='Fc_2')
     return fc1, fc2
Beispiel #11
0
    def predict(self, features, num_predictions_per_location=1):
        """Predicts boxes.

    Args:
      features: A float tensor of shape [batch_size, height, width,
        channels] containing features for a batch of images.
      num_predictions_per_location: Int containing number of predictions per
        location.

    Returns:
      box_encodings: A float tensor of shape
        [batch_size, 1, num_classes, code_size] representing the location of the
        objects.

    Raises:
      ValueError: If num_predictions_per_location is not 1.
    """
        if num_predictions_per_location != 1:
            raise ValueError(
                'Only num_predictions_per_location=1 is supported')
        spatial_averaged_roi_pooled_features = tf.reduce_mean(features, [1, 2],
                                                              keep_dims=True,
                                                              name='AvgPool')
        flattened_roi_pooled_features = slim.flatten(
            spatial_averaged_roi_pooled_features)
        if self._use_dropout:
            flattened_roi_pooled_features = slim.dropout(
                flattened_roi_pooled_features,
                keep_prob=self._dropout_keep_prob,
                is_training=self._is_training)
        number_of_boxes = 1
        if not self._share_box_across_classes:
            number_of_boxes = self._num_classes

        with slim.arg_scope(self._fc_hyperparams_fn()):
            box_encodings = slim.fully_connected(flattened_roi_pooled_features,
                                                 number_of_boxes *
                                                 self._box_code_size,
                                                 reuse=tf.AUTO_REUSE,
                                                 activation_fn=None,
                                                 scope='BoxEncodingPredictor')
        box_encodings = tf.reshape(
            box_encodings, [-1, 1, number_of_boxes, self._box_code_size])
        return box_encodings
  def _network_template(self, state):
    """Builds the convolutional network used to compute the agent's Q-values.

    Args:
      state: tf.Placeholder, contains the agent's current state.

    Returns:
      net: _network_type object containing the tensors output by the network.
    """
    net = tf.cast(state, tf.float32)
    net = tf.math.truediv(net, 255.)
    net = tf_slim.conv2d(net, 32, [8, 8], stride=4, trainable=False)
    net = tf_slim.conv2d(net, 64, [4, 4], stride=2, trainable=False)
    net = tf_slim.conv2d(net, 64, [3, 3], stride=1, trainable=False)
    net = tf_slim.flatten(net)
    linear_features = tf_slim.fully_connected(net, 512, trainable=True)
    q_values = tf_slim.fully_connected(
        linear_features, self.num_actions, activation_fn=None)
    return self._get_network_type()(q_values), linear_features
Beispiel #13
0
    def predict(self, features, num_predictions_per_location=1):
        """Predicts boxes and class scores.

    Args:
      features: A float tensor of shape [batch_size, height, width, channels]
        containing features for a batch of images.
      num_predictions_per_location: Int containing number of predictions per
        location.

    Returns:
      class_predictions_with_background: A float tensor of shape
        [batch_size, 1, num_class_slots] representing the class predictions for
        the proposals.

    Raises:
      ValueError: If num_predictions_per_location is not 1.
    """
        if num_predictions_per_location != 1:
            raise ValueError(
                'Only num_predictions_per_location=1 is supported')
        spatial_averaged_roi_pooled_features = tf.reduce_mean(features, [1, 2],
                                                              keep_dims=True,
                                                              name='AvgPool')
        flattened_roi_pooled_features = slim.flatten(
            spatial_averaged_roi_pooled_features)
        if self._use_dropout:
            flattened_roi_pooled_features = slim.dropout(
                flattened_roi_pooled_features,
                keep_prob=self._dropout_keep_prob,
                is_training=self._is_training)

        with slim.arg_scope(self._fc_hyperparams_fn()):
            class_predictions_with_background = slim.fully_connected(
                flattened_roi_pooled_features,
                self._num_class_slots,
                reuse=tf.AUTO_REUSE,
                activation_fn=None,
                scope=self._scope)
        class_predictions_with_background = tf.reshape(
            class_predictions_with_background, [-1, 1, self._num_class_slots])
        return class_predictions_with_background
Beispiel #14
0
    def build_predictions(self, net, rois, is_training, initializer,
                          initializer_bbox):

        # Crop image ROIs
        pool5 = self._crop_pool_layer(net, rois, "pool5")
        pool5_flat = slim.flatten(pool5, scope='flatten')

        # Fully connected layers
        fc6 = slim.fully_connected(pool5_flat, 4096, scope='fc6')
        if is_training:
            fc6 = slim.dropout(fc6,
                               keep_prob=0.5,
                               is_training=True,
                               scope='dropout6')

        fc7 = slim.fully_connected(fc6, 4096, scope='fc7')
        if is_training:
            fc7 = slim.dropout(fc7,
                               keep_prob=0.5,
                               is_training=True,
                               scope='dropout7')

        # Scores and predictions
        cls_score = slim.fully_connected(fc7,
                                         self._num_classes,
                                         weights_initializer=initializer,
                                         trainable=is_training,
                                         activation_fn=None,
                                         scope='cls_score')
        cls_prob = self._softmax_layer(cls_score, "cls_prob")
        bbox_prediction = slim.fully_connected(
            fc7,
            self._num_classes * 4,
            weights_initializer=initializer_bbox,
            trainable=is_training,
            activation_fn=None,
            scope='bbox_pred')

        return cls_score, cls_prob, bbox_prediction
Beispiel #15
0
    def create_network(self, input, trainable):
        if trainable:
            wr = slim.l2_regularizer(self.regularization)
        else:
            wr = None

        # the input is stack of black and white frames.
        # put the stack in the place of channel (last in tf)
        input_t = tf.transpose(input, [0, 2, 3, 1])

        net = slim.conv2d(input_t,
                          8, (7, 7),
                          data_format="NHWC",
                          activation_fn=tf.nn.relu,
                          stride=3,
                          weights_regularizer=wr,
                          trainable=trainable)
        net = slim.max_pool2d(net, 2, 2)
        net = slim.conv2d(net,
                          16, (3, 3),
                          data_format="NHWC",
                          activation_fn=tf.nn.relu,
                          weights_regularizer=wr,
                          trainable=trainable)
        net = slim.max_pool2d(net, 2, 2)
        net = slim.flatten(net)
        net = slim.fully_connected(net,
                                   256,
                                   activation_fn=tf.nn.relu,
                                   weights_regularizer=wr,
                                   trainable=trainable)
        q_state_action_values = slim.fully_connected(net,
                                                     self.dim_actions,
                                                     activation_fn=None,
                                                     weights_regularizer=wr,
                                                     trainable=trainable)

        return q_state_action_values
Beispiel #16
0
def _build_aux_head(net, end_points, num_classes, hparams, scope):
    """Auxiliary head used for all models across all datasets."""
    with tf.compat.v1.variable_scope(scope):
        aux_logits = tf.identity(net)
        with tf.compat.v1.variable_scope('aux_logits'):
            aux_logits = slim.avg_pool2d(aux_logits, [5, 5],
                                         stride=3,
                                         padding='VALID')
            aux_logits = slim.conv2d(aux_logits, 128, [1, 1], scope='proj')
            aux_logits = slim.batch_norm(aux_logits, scope='aux_bn0')
            aux_logits = tf.nn.relu(aux_logits)
            # Shape of feature map before the final layer.
            shape = aux_logits.shape
            if hparams.data_format == 'NHWC':
                shape = shape[1:3]
            else:
                shape = shape[2:4]
            aux_logits = slim.conv2d(aux_logits, 768, shape, padding='VALID')
            aux_logits = slim.batch_norm(aux_logits, scope='aux_bn1')
            aux_logits = tf.nn.relu(aux_logits)
            aux_logits = slim.flatten(aux_logits)
            aux_logits = slim.fully_connected(aux_logits, num_classes)
            end_points['AuxLogits'] = aux_logits
Beispiel #17
0
def inception_resnet_v2(inputs,
                        num_classes=1001,
                        is_training=True,
                        dropout_keep_prob=0.8,
                        reuse=None,
                        scope='InceptionResnetV2',
                        create_aux_logits=True,
                        activation_fn=tf.nn.relu):
    """Creates the Inception Resnet V2 model.

  Args:
    inputs: a 4-D tensor of size [batch_size, height, width, 3].
      Dimension batch_size may be undefined. If create_aux_logits is false,
      also height and width may be undefined.
    num_classes: number of predicted classes. If 0 or None, the logits layer
      is omitted and the input features to the logits layer (before  dropout)
      are returned instead.
    is_training: whether is training or not.
    dropout_keep_prob: float, the fraction to keep before final layer.
    reuse: whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    scope: Optional variable_scope.
    create_aux_logits: Whether to include the auxilliary logits.
    activation_fn: Activation function for conv2d.

  Returns:
    net: the output of the logits layer (if num_classes is a non-zero integer),
      or the non-dropped-out input to the logits layer (if num_classes is 0 or
      None).
    end_points: the set of end_points from the inception model.
  """
    end_points = {}

    with tf.variable_scope(scope, 'InceptionResnetV2', [inputs],
                           reuse=reuse) as scope:
        with slim.arg_scope([slim.batch_norm, slim.dropout],
                            is_training=is_training):

            net, end_points = inception_resnet_v2_base(
                inputs, scope=scope, activation_fn=activation_fn)

            if create_aux_logits and num_classes:
                with tf.variable_scope('AuxLogits'):
                    aux = end_points['PreAuxLogits']
                    aux = slim.avg_pool2d(aux,
                                          5,
                                          stride=3,
                                          padding='VALID',
                                          scope='Conv2d_1a_3x3')
                    aux = slim.conv2d(aux, 128, 1, scope='Conv2d_1b_1x1')
                    aux = slim.conv2d(aux,
                                      768,
                                      aux.get_shape()[1:3],
                                      padding='VALID',
                                      scope='Conv2d_2a_5x5')
                    aux = slim.flatten(aux)
                    aux = slim.fully_connected(aux,
                                               num_classes,
                                               activation_fn=None,
                                               scope='Logits')
                    end_points['AuxLogits'] = aux

            with tf.variable_scope('Logits'):
                # TODO(sguada,arnoegw): Consider adding a parameter global_pool which
                # can be set to False to disable pooling here (as in resnet_*()).
                kernel_size = net.get_shape()[1:3]
                if kernel_size.is_fully_defined():
                    net = slim.avg_pool2d(net,
                                          kernel_size,
                                          padding='VALID',
                                          scope='AvgPool_1a_8x8')
                else:
                    net = tf.reduce_mean(net, [1, 2],
                                         keep_dims=True,
                                         name='global_pool')
                end_points['global_pool'] = net
                if not num_classes:
                    return net, end_points
                net = slim.flatten(net)
                net = slim.dropout(net,
                                   dropout_keep_prob,
                                   is_training=is_training,
                                   scope='Dropout')
                end_points['PreLogitsFlatten'] = net
                logits = slim.fully_connected(net,
                                              num_classes,
                                              activation_fn=None,
                                              scope='Logits')
                end_points['Logits'] = logits
                end_points['Predictions'] = tf.nn.softmax(logits,
                                                          name='Predictions')

        return logits, end_points
Beispiel #18
0
    def create_net(self):
        opts = self.opts["net"]
        channels = opts["channels"]
        net = collections.OrderedDict()
        regularizer = slim.l2_regularizer(0.0005)

        net["input"] = tf.compat.v1.placeholder(
            tf.float32,
            (None, opts["spec_height"], opts["hww_x"] * 2, channels),
            name="input",
        )
        net["conv1_1"] = slim.conv2d(
            net["input"],
            opts["num_filters"],
            (opts["spec_height"] - opts["wiggle_room"],
             opts["conv_filter_width"]),
            padding="valid",
            activation_fn=None,
            biases_initializer=None,
            weights_regularizer=regularizer,
        )
        net["conv1_1"] = tf.nn.leaky_relu(net["conv1_1"], alpha=1 / 3)

        net["conv1_2"] = slim.conv2d(
            net["conv1_1"],
            opts["num_filters"],
            (1, 3),
            padding="valid",
            activation_fn=None,
            biases_initializer=None,
            weights_regularizer=regularizer,
        )
        net["conv1_2"] = tf.nn.leaky_relu(net["conv1_2"], alpha=1 / 3)

        W = net["conv1_2"].shape[2]
        net["pool2"] = slim.max_pool2d(
            net["conv1_2"],
            kernel_size=(1, W),
            stride=(1, 1),
        )

        net["pool2"] = tf.transpose(net["pool2"], (0, 3, 2, 1))
        net["pool2_flat"] = slim.flatten(net["pool2"])

        net["fc6"] = slim.fully_connected(
            net["pool2_flat"],
            opts["num_dense_units"],
            activation_fn=None,
            biases_initializer=None,
            weights_regularizer=regularizer,
        )
        net["fc6"] = tf.nn.dropout(net["fc6"], 0.5)
        net["fc6"] = tf.nn.leaky_relu(net["fc6"], alpha=1 / 3)

        net["fc7"] = slim.fully_connected(
            net["fc6"],
            opts["num_dense_units"],
            activation_fn=None,
            biases_initializer=None,
            weights_regularizer=regularizer,
        )
        net["fc7"] = tf.nn.dropout(net["fc7"], 0.5)
        net["fc7"] = tf.nn.leaky_relu(net["fc7"], alpha=1 / 3)

        net["fc8"] = slim.fully_connected(net["fc7"], 2, activation_fn=None)
        # net['fc8'] = tf.nn.leaky_relu(net['fc8'], alpha=1/3)
        net["output"] = tf.nn.softmax(net["fc8"])
        return net
def generator(z,
              progress,
              num_filters_fn,
              resolution_schedule,
              num_blocks=None,
              kernel_size=3,
              colors=3,
              to_rgb_activation=None,
              simple_arch=False,
              scope='progressive_gan_generator',
              reuse=None):
  """Generator network for the progressive GAN model.

  Args:
    z: A `Tensor` of latent vector. The first dimension must be batch size.
    progress: A scalar float `Tensor` of training progress.
    num_filters_fn: A function that maps `block_id` to # of filters for the
        block.
    resolution_schedule: An object of `ResolutionSchedule`.
    num_blocks: An integer of number of blocks. None means maximum number of
        blocks, i.e. `resolution.schedule.num_resolutions`. Defaults to None.
    kernel_size: An integer of convolution kernel size.
    colors: Number of output color channels. Defaults to 3.
    to_rgb_activation: Activation function applied when output rgb.
    simple_arch: Architecture variants for lower memory usage and faster speed
    scope: A string or variable scope.
    reuse: Whether to reuse `scope`. Defaults to None which means to inherit
        the reuse option of the parent scope.
  Returns:
    A `Tensor` of model output and a dictionary of model end points.
  """
  if num_blocks is None:
    num_blocks = resolution_schedule.num_resolutions

  start_h, start_w = resolution_schedule.start_resolutions
  final_h, final_w = resolution_schedule.final_resolutions

  def _conv2d(scope, x, kernel_size, filters, padding='SAME'):
    return layers.custom_conv2d(
        x=x,
        filters=filters,
        kernel_size=kernel_size,
        padding=padding,
        activation=lambda x: layers.pixel_norm(tf.nn.leaky_relu(x)),
        he_initializer_slope=0.0,
        scope=scope)

  def _to_rgb(x):
    return layers.custom_conv2d(
        x=x,
        filters=colors,
        kernel_size=1,
        padding='SAME',
        activation=to_rgb_activation,
        scope='to_rgb')

  he_init = tf_slim.variance_scaling_initializer()

  end_points = {}

  with tf.variable_scope(scope, reuse=reuse):
    with tf.name_scope('input'):
      x = tf_slim.flatten(z)
      end_points['latent_vector'] = x

    with tf.variable_scope(block_name(1)):
      if simple_arch:
        x_shape = tf.shape(x)
        x = tf.layers.dense(x, start_h*start_w*num_filters_fn(1),
                            kernel_initializer=he_init)
        x = tf.nn.relu(x)
        x = tf.reshape(x, [x_shape[0], start_h, start_w, num_filters_fn(1)])
      else:
        x = tf.expand_dims(tf.expand_dims(x, 1), 1)
        x = layers.pixel_norm(x)
        # Pad the 1 x 1 image to 2 * (start_h - 1) x 2 * (start_w - 1)
        # with zeros for the next conv.
        x = tf.pad(x, [[0] * 2, [start_h - 1] * 2, [start_w - 1] * 2, [0] * 2])
        # The output is start_h x start_w x num_filters_fn(1).
        x = _conv2d('conv0', x, (start_h, start_w), num_filters_fn(1), 'VALID')
        x = _conv2d('conv1', x, kernel_size, num_filters_fn(1))
      lods = [x]

    if resolution_schedule.scale_mode == 'H':
      strides = (resolution_schedule.scale_base, 1)
    else:
      strides = (resolution_schedule.scale_base,
                 resolution_schedule.scale_base)

    for block_id in range(2, num_blocks + 1):
      with tf.variable_scope(block_name(block_id)):
        if simple_arch:
          x = tf.layers.conv2d_transpose(
              x,
              num_filters_fn(block_id),
              kernel_size=kernel_size,
              strides=strides,
              padding='SAME',
              kernel_initializer=he_init)
          x = tf.nn.relu(x)
        else:
          x = resolution_schedule.upscale(x, resolution_schedule.scale_base)
          x = _conv2d('conv0', x, kernel_size, num_filters_fn(block_id))
          x = _conv2d('conv1', x, kernel_size, num_filters_fn(block_id))
        lods.append(x)

    outputs = []
    for block_id in range(1, num_blocks + 1):
      with tf.variable_scope(block_name(block_id)):
        if simple_arch:
          lod = lods[block_id - 1]
          lod = tf.layers.conv2d(
              lod,
              colors,
              kernel_size=1,
              padding='SAME',
              name='to_rgb',
              kernel_initializer=he_init)
          lod = to_rgb_activation(lod)
        else:
          lod = _to_rgb(lods[block_id - 1])
        scale = resolution_schedule.scale_factor(block_id)
        lod = resolution_schedule.upscale(lod, scale)
        end_points['upscaled_rgb_{}'.format(block_id)] = lod

        # alpha_i is used to replace lod_select. Note sum(alpha_i) is
        # garanteed to be 1.
        alpha = _generator_alpha(block_id, progress)
        end_points['alpha_{}'.format(block_id)] = alpha

        outputs.append(lod * alpha)

    predictions = tf.add_n(outputs)
    batch_size = int(z.shape[0])
    predictions.set_shape([batch_size, final_h, final_w, colors])
    end_points['predictions'] = predictions

  return predictions, end_points
Beispiel #20
0
def lenet(images,
          num_classes=10,
          is_training=False,
          dropout_keep_prob=0.5,
          prediction_fn=slim.softmax,
          scope='LeNet'):
    """Creates a variant of the LeNet model.

  Note that since the output is a set of 'logits', the values fall in the
  interval of (-infinity, infinity). Consequently, to convert the outputs to a
  probability distribution over the characters, one will need to convert them
  using the softmax function:

        logits = lenet.lenet(images, is_training=False)
        probabilities = tf.nn.softmax(logits)
        predictions = tf.argmax(logits, 1)

  Args:
    images: A batch of `Tensors` of size [batch_size, height, width, channels].
    num_classes: the number of classes in the dataset. If 0 or None, the logits
      layer is omitted and the input features to the logits layer are returned
      instead.
    is_training: specifies whether or not we're currently training the model.
      This variable will determine the behaviour of the dropout layer.
    dropout_keep_prob: the percentage of activation values that are retained.
    prediction_fn: a function to get predictions out of logits.
    scope: Optional variable_scope.

  Returns:
     net: a 2D Tensor with the logits (pre-softmax activations) if num_classes
      is a non-zero integer, or the inon-dropped-out nput to the logits layer
      if num_classes is 0 or None.
    end_points: a dictionary from components of the network to the corresponding
      activation.
  """
    end_points = {}

    with tf.variable_scope(scope, 'LeNet', [images]):
        net = end_points['conv1'] = slim.conv2d(images,
                                                32, [5, 5],
                                                scope='conv1')
        net = end_points['pool1'] = slim.max_pool2d(net, [2, 2],
                                                    2,
                                                    scope='pool1')
        net = end_points['conv2'] = slim.conv2d(net, 64, [5, 5], scope='conv2')
        net = end_points['pool2'] = slim.max_pool2d(net, [2, 2],
                                                    2,
                                                    scope='pool2')
        net = slim.flatten(net)
        end_points['Flatten'] = net

        net = end_points['fc3'] = slim.fully_connected(net, 1024, scope='fc3')
        if not num_classes:
            return net, end_points
        net = end_points['dropout3'] = slim.dropout(net,
                                                    dropout_keep_prob,
                                                    is_training=is_training,
                                                    scope='dropout3')
        logits = end_points['Logits'] = slim.fully_connected(
            net, num_classes, activation_fn=None, scope='fc4')

    end_points['Predictions'] = prediction_fn(logits, scope='Predictions')

    return logits, end_points
Beispiel #21
0
    def _run():
      """Forward pass through the network."""
      with slim.arg_scope([slim.dropout], is_training=is_training):
        with slim.arg_scope(
            [slim.conv2d, slim.fully_connected],
            weights_initializer=tf.truncated_normal_initializer(stddev=0.01),
            weights_regularizer=slim.l2_regularizer(self._l2_regularization),
            activation_fn=tf.nn.relu,
            trainable=is_training):
          with slim.arg_scope(
              [slim.conv2d, slim.max_pool2d], stride=1, padding='SAME'):
            with slim.arg_scope(
                [slim.conv2d, slim.fully_connected],
                normalizer_fn=slim.batch_norm,
                normalizer_params=batch_norm):
              _, grasp_image = images
              net = slim.conv2d(
                  grasp_image,
                  64, [6, 6],
                  stride=2,
                  scope='conv1_1',
                  activation_fn=None,
                  normalizer_fn=None,
                  normalizer_params=None)
              # Old checkpoints (such as those used for tests) did not have
              # scaling on the separate batch norm operations (those not
              # associated with a conv operation), so only setting the scale
              # parameter in arg_scope would break the tests. We set scale=
              # False for these separate batch norm operations temporarily.
              # However, future users are encouraged to not set scale=False so
              # that barch_norm parameters are consistent through the whole
              # network.
              net = tf.nn.relu(slim.batch_norm(net, scale=False))
              net = slim.max_pool2d(net, [3, 3], stride=3, scope='pool1')
              self.activation_layers.append(net)
              for l in range(2, 2 + self.num_convs[0]):
                net = slim.conv2d(net, 64, [5, 5], scope='conv%d' % l)
                self.activation_layers.append(net)
              net = slim.max_pool2d(net, [3, 3], stride=3, scope='pool2')
              end_points['pool2'] = net
              self.activation_layers.append(net)
              logging.debug('pool2')
              logging.debug(net.get_shape())

              if grasp_param_names is None:
                grasp_param_blocks = [grasp_params]
                grasp_param_block_names = ['fcgrasp']
              else:
                grasp_param_blocks = []
                grasp_param_block_names = []
                # Note: Creating variables must happen in a deterministic
                # order, otherwise some workers will look for variables on the
                # wrong parameter servers, so we sort the grasp_param_names
                # here.
                for block_name in sorted(grasp_param_names):
                  offset, size = grasp_param_names[block_name]
                  grasp_param_blocks += [
                      tf.slice(grasp_params, [0, offset], [-1, size])
                  ]
                  grasp_param_block_names += [block_name]

              grasp_param_tensors = []
              for block, name in zip(grasp_param_blocks,
                                     grasp_param_block_names):
                grasp_param_tensors += [
                    slim.fully_connected(
                        block,
                        256,
                        scope=name,
                        activation_fn=None,
                        normalizer_fn=None,
                        normalizer_params=None)
                ]

              fcgrasp = tf.add_n(grasp_param_tensors)

              # Old checkpoints (such as those used for tests) did not have
              # scaling on the separate batch norm operations (those not
              # associated with a conv operation), so only setting the scale
              # parameter in arg_scope would break the tests. We set scale=
              # False for these separate batch norm operations temporarily.
              # However, future users are encouraged to not set scale=False so
              # that barch_norm parameters are consistent through the whole
              # network.
              fcgrasp = tf.nn.relu(slim.batch_norm(fcgrasp, scale=False))
              fcgrasp = slim.fully_connected(fcgrasp, 64, scope='fcgrasp2')
              context = tf.reshape(fcgrasp, [-1, 1, 1, 64])
              end_points['fcgrasp'] = fcgrasp
              # Tile the image embedding action_batch_size times to align
              # with the expanded action dimension of action_batch_size.
              # Same image is used with all the actions in a action_batch.
              # net pre expansion should be [batch, *, *, *]
              # net post expansion should be [batch x action_batch, *, *, *]
              if tile_batch:
                net = contrib_seq2seq.tile_batch(net, self._action_batch_size)
              net = tf.add(net, context)
              logging.debug('net post add %s', net)
              end_points['vsum'] = net
              self.activation_layers.append(net)
              logging.debug('vsum')
              logging.debug(net.get_shape())
              for l in range(2 + sum(self.num_convs[:1]),
                             2 + sum(self.num_convs[:2])):
                net = slim.conv2d(net, 64, [3, 3], scope='conv%d' % l)
                logging.debug('conv%d', l)
                self.activation_layers.append(net)
              logging.debug(net.get_shape())
              net = slim.max_pool2d(net, [2, 2], stride=2, scope='pool3')
              logging.debug('pool3')
              logging.debug(net.get_shape())
              self.activation_layers.append(net)
              for l in range(2 + sum(self.num_convs[:2]),
                             2 + sum(self.num_convs[:3])):
                net = slim.conv2d(
                    net, 64, [3, 3], scope='conv%d' % l, padding='VALID')
                self.activation_layers.append(net)
              logging.debug('final conv')
              logging.debug(net.get_shape())
              end_points['final_conv'] = net

              batch_size = tf.shape(net)[0]
              if goal_spatial_fn is not None:
                goal_spatial = goal_spatial_fn()
                # Tile goal to match net batch size (e.g. CEM).
                goal_batch_size = tf.shape(goal_spatial)[0]
                goal_spatial = tf.tile(
                    goal_spatial, [batch_size//goal_batch_size, 1, 1, 1])
                # Merging features in style of Fang 2017.
                net = tf.concat([net, goal_spatial], axis=3)
              net = slim.flatten(net, scope='flatten')

              if goal_vector_fn is not None:
                goal_vector = goal_vector_fn()
                goal_batch_size = tf.shape(goal_vector)[0]
                goal_vector = tf.tile(
                    goal_vector, [batch_size//goal_batch_size, 1])
                net = tf.concat([net, goal_vector], axis=1)

              for l in range(self.hid_layers):
                net = slim.fully_connected(net, 64, scope='fc%d' % l)

              name = 'logit'
              if num_classes > 1:
                name = 'logit_%d' % num_classes
              logits = slim.fully_connected(
                  net,
                  num_classes,
                  activation_fn=None,
                  scope=name,
                  normalizer_fn=None,
                  normalizer_params=None)
              end_points['logits'] = logits
              if softmax:
                predictions = tf.nn.softmax(logits)
              else:
                predictions = tf.nn.sigmoid(logits)
              if tile_batch:

                if num_classes > 1:
                  predictions = tf.reshape(
                      predictions, [-1, self._action_batch_size, num_classes])
                else:
                  predictions = tf.reshape(predictions,
                                           [-1, self._action_batch_size])
              end_points['predictions'] = predictions
              return logits, end_points
def inception_resnet_v1(inputs,
                        is_training=True,
                        dropout_keep_prob=0.8,
                        bottleneck_layer_size=128,
                        reuse=None,
                        scope='InceptionResnetV1'):
    """Creates the Inception Resnet V1 model.
    Args:
      inputs: a 4-D tensor of size [batch_size, height, width, 3].
      num_classes: number of predicted classes.
      is_training: whether is training or not.
      dropout_keep_prob: float, the fraction to keep before final layer.
      reuse: whether or not the network and its variables should be reused. To be
        able to reuse 'scope' must be given.
      scope: Optional variable_scope.
    Returns:
      logits: the logits outputs of the model.
      end_points: the set of end_points from the inception model.
    """
    end_points = {}

    with tf.variable_scope(scope, 'InceptionResnetV1', [inputs], reuse=reuse):
        with slim.arg_scope([slim.batch_norm, slim.dropout],
                            is_training=is_training):
            with slim.arg_scope(
                [slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                    stride=1,
                    padding='SAME'):
                # 149 x 149 x 32
                net = slim.conv2d(inputs,
                                  32,
                                  3,
                                  stride=2,
                                  padding='VALID',
                                  scope='Conv2d_1a_3x3')
                end_points['Conv2d_1a_3x3'] = net
                # 147 x 147 x 32
                net = slim.conv2d(net,
                                  32,
                                  3,
                                  padding='VALID',
                                  scope='Conv2d_2a_3x3')
                end_points['Conv2d_2a_3x3'] = net
                # 147 x 147 x 64
                net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3')
                end_points['Conv2d_2b_3x3'] = net
                # 73 x 73 x 64
                net = slim.max_pool2d(net,
                                      3,
                                      stride=2,
                                      padding='VALID',
                                      scope='MaxPool_3a_3x3')
                end_points['MaxPool_3a_3x3'] = net
                # 73 x 73 x 80
                net = slim.conv2d(net,
                                  80,
                                  1,
                                  padding='VALID',
                                  scope='Conv2d_3b_1x1')
                end_points['Conv2d_3b_1x1'] = net
                # 71 x 71 x 192
                net = slim.conv2d(net,
                                  192,
                                  3,
                                  padding='VALID',
                                  scope='Conv2d_4a_3x3')
                end_points['Conv2d_4a_3x3'] = net
                # 35 x 35 x 256
                net = slim.conv2d(net,
                                  256,
                                  3,
                                  stride=2,
                                  padding='VALID',
                                  scope='Conv2d_4b_3x3')
                end_points['Conv2d_4b_3x3'] = net

                # 5 x Inception-resnet-A
                net = slim.repeat(net, 5, block35, scale=0.17)

                # Reduction-A
                with tf.variable_scope('Mixed_6a'):
                    net = reduction_a(net, 192, 192, 256, 384)
                end_points['Mixed_6a'] = net

                # 10 x Inception-Resnet-B
                net = slim.repeat(net, 10, block17, scale=0.10)

                # Reduction-B
                with tf.variable_scope('Mixed_7a'):
                    net = reduction_b(net)
                end_points['Mixed_7a'] = net

                # 5 x Inception-Resnet-C
                net = slim.repeat(net, 5, block8, scale=0.20)
                net = block8(net, activation_fn=None)

                with tf.variable_scope('Logits'):
                    end_points['PrePool'] = net
                    # pylint: disable=no-member
                    net = slim.avg_pool2d(net,
                                          net.get_shape()[1:3],
                                          padding='VALID',
                                          scope='AvgPool_1a_8x8')
                    net = slim.flatten(net)

                    net = slim.dropout(net,
                                       dropout_keep_prob,
                                       is_training=is_training,
                                       scope='Dropout')

                    end_points['PreLogitsFlatten'] = net

                net = slim.fully_connected(net,
                                           bottleneck_layer_size,
                                           activation_fn=None,
                                           scope='Bottleneck',
                                           reuse=False)

    return net, end_points
Beispiel #23
0
 def forward(self):
     temp = tf.transpose(self.inp.out, [0, 3, 1, 2])
     self.out = slim.flatten(temp, scope=self.scope)
def decoder(encoded, scales, styles, texture_only=False, style_size=8, image_size=(112,112),
        keep_prob=1.0, phase_train=True, weight_decay=0.0, reuse=None, scope='Decoder'):
    with tf.compat.v1.variable_scope(scope, reuse=reuse):
        with slim.arg_scope([slim.conv2d, slim.conv2d_transpose, slim.fully_connected],
                        activation_fn=tf.nn.relu,
                        # weights_initializer=tf.contrib.layers.xavier_initializer(),
                        weights_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=2.0),
                        weights_regularizer=tf.keras.regularizers.l2(0.5 * (weight_decay))):
            with slim.arg_scope([slim.dropout, slim.batch_norm], is_training=phase_train):
                with slim.arg_scope([slim.fully_connected],
                    normalizer_fn=layer_norm, normalizer_params=None):
                    print('{} input shape:'.format(scope), [dim.value for dim in encoded.shape])
                        
                    batch_size = tf.shape(input=encoded)[0]
                    h, w = tuple(image_size)
                    k = 64
    
                    with tf.compat.v1.variable_scope('StyleController'):

                        if styles is None:
                            styles = tf.random.normal((batch_size, style_size))

                        net = tf.identity(styles, name='input_style')

                        net = slim.fully_connected(net, 128, scope='fc2')
                        print('module fc2 shape:', [dim.value for dim in net.shape])

                        net = slim.fully_connected(net, 128, scope='fc3')
                        print('module fc3 shape:', [dim.value for dim in net.shape])

                        gamma = slim.fully_connected(net, 4*k, activation_fn=None, normalizer_fn=None, scope='fc4')
                        gamma = tf.reshape(gamma, [-1, 1, 1, 4*k], name='gamma')
                        print('gamma shape:', [dim.value for dim in gamma.shape])

                        beta = slim.fully_connected(net, 4*k, activation_fn=None, normalizer_fn=None, scope='fc5')
                        beta = tf.reshape(beta, [-1, 1, 1, 4*k], name='beta')
                        print('beta shape:', [dim.value for dim in beta.shape])


                    
                    with tf.compat.v1.variable_scope('Decoder'):
                        print('-- Decoder')
                        net = encoded

                        adain = lambda x : gamma * instance_norm(x, center=False, scale=False) + beta

                        with slim.arg_scope([slim.conv2d_transpose, slim.conv2d],
                                    normalizer_fn=adain, normalizer_params=None):
                            for i in range(3):
                                net_ = conv(net, 4*k, 3, scope='res{}_0'.format(i))
                                net += conv(net_, 4*k, 3, activation_fn=None, biases_initializer=None, scope='res{}_1'.format(i))
                                print('module res{} shape:'.format(i), [dim.value for dim in net.shape])

               
                        with slim.arg_scope([slim.conv2d, slim.conv2d_transpose, slim.fully_connected],
                                normalizer_fn=layer_norm, normalizer_params=None):
                            net = upscale2d(net, 2)
                            net = conv(net, 2*k, 5, pad=2, scope='deconv1_1')
                            print('module deconv1 shape:', [dim.value for dim in net.shape])

                            net = upscale2d(net, 2)
                            net = conv(net, k, 5, pad=2, scope='deconv2_1')

                        net = conv(net, 3, 7, pad=3, activation_fn=None, normalizer_fn=None, 
                                    weights_initializer=tf.compat.v1.constant_initializer(0.0), scope='conv_image')
                        images_rendered = tf.nn.tanh(net, name='images_rendered')
                        print('images_rendered shape:', [dim.value for dim in images_rendered.shape])

                    if texture_only:
                        return images_rendered                        

                    with tf.compat.v1.variable_scope('WarpController'):

                        print('-- WarpController')

                        net = encoded
                        warp_input = tf.identity(images_rendered, name='warp_input')

                        net = slim.flatten(net)

                        net = slim.fully_connected(net, 128, scope='fc1')
                        print('module fc1 shape:', [dim.value for dim in net.shape])

                        num_ldmark = 16

                        # Predict the control points
                        ldmark_mean = (np.random.normal(0,50, (num_ldmark,2)) + np.array([[0.5*h,0.5*w]])).flatten()
                        ldmark_mean = tf.Variable(ldmark_mean.astype(np.float32), name='ldmark_mean')
                        print('ldmark_mean shape:', [dim.value for dim in ldmark_mean.shape])

                        ldmark_pred = slim.fully_connected(net, num_ldmark*2, 
                            weights_initializer=tf.compat.v1.truncated_normal_initializer(stddev=1.0),
                            normalizer_fn=None, activation_fn=None, biases_initializer=None, scope='fc_ldmark')
                        ldmark_pred = ldmark_pred + ldmark_mean
                        print('ldmark_pred shape:', [dim.value for dim in ldmark_pred.shape])
                        ldmark_pred = tf.identity(ldmark_pred, name='ldmark_pred')
                 

                        # Predict the displacements
                        ldmark_diff = slim.fully_connected(net, num_ldmark*2, 
                            normalizer_fn=None,  activation_fn=None, scope='fc_diff')
                        print('ldmark_diff shape:', [dim.value for dim in ldmark_diff.shape])
                        ldmark_diff = tf.identity(ldmark_diff, name='ldmark_diff')
                        ldmark_diff = tf.identity(tf.reshape(scales,[-1,1]) * ldmark_diff, name='ldmark_diff_scaled')



                        src_pts = tf.reshape(ldmark_pred, [-1, num_ldmark ,2])
                        dst_pts = tf.reshape(ldmark_pred + ldmark_diff, [-1, num_ldmark, 2])

                        diff_norm = tf.reduce_mean(input_tensor=tf.norm(tensor=src_pts-dst_pts, axis=[1,2]))
                        # tf.summary.scalar('diff_norm', diff_norm)
                        # tf.summary.scalar('mark', ldmark_pred[0,0])

                        images_transformed, dense_flow = sparse_image_warp(warp_input, src_pts, dst_pts,
                                regularization_weight = 1e-6, num_boundary_points=0)
                        dense_flow = tf.identity(dense_flow, name='dense_flow')

                return images_transformed, images_rendered, ldmark_pred, ldmark_diff
def loss(self, net_out):
    """
    Takes net.out and placeholders value
    returned in batch() func above,
    to build train_op and loss
    """
    # meta
    m = self.meta
    sprob = float(m['class_scale'])
    sconf = float(m['object_scale'])
    snoob = float(m['noobject_scale'])
    scoor = float(m['coord_scale'])
    S, B, C = m['side'], m['num'], m['classes']
    SS = S * S  # number of grid cells

    print('{} loss hyper-parameters:'.format(m['model']))
    print('\tside    = {}'.format(m['side']))
    print('\tbox     = {}'.format(m['num']))
    print('\tclasses = {}'.format(m['classes']))
    print('\tscales  = {}'.format([sprob, sconf, snoob, scoor]))

    size1 = [None, SS, C]
    size2 = [None, SS, B]

    # return the below placeholders
    _probs = tf.placeholder(tf.float32, size1)
    _confs = tf.placeholder(tf.float32, size2)
    _coord = tf.placeholder(tf.float32, size2 + [4])
    # weights term for L2 loss
    _proid = tf.placeholder(tf.float32, size1)
    # material calculating IOU
    _areas = tf.placeholder(tf.float32, size2)
    _upleft = tf.placeholder(tf.float32, size2 + [2])
    _botright = tf.placeholder(tf.float32, size2 + [2])

    self.placeholders = {
        'probs': _probs,
        'confs': _confs,
        'coord': _coord,
        'proid': _proid,
        'areas': _areas,
        'upleft': _upleft,
        'botright': _botright
    }

    # Extract the coordinate prediction from net.out
    coords = net_out[:, SS * (C + B):]
    coords = tf.reshape(coords, [-1, SS, B, 4])
    wh = tf.pow(coords[:, :, :, 2:4], 2) * S  # unit: grid cell
    area_pred = wh[:, :, :, 0] * wh[:, :, :, 1]  # unit: grid cell^2
    centers = coords[:, :, :, 0:2]  # [batch, SS, B, 2]
    floor = centers - (wh * .5)  # [batch, SS, B, 2]
    ceil = centers + (wh * .5)  # [batch, SS, B, 2]

    # calculate the intersection areas
    intersect_upleft = tf.maximum(floor, _upleft)
    intersect_botright = tf.minimum(ceil, _botright)
    intersect_wh = intersect_botright - intersect_upleft
    intersect_wh = tf.maximum(intersect_wh, 0.0)
    intersect = tf.multiply(intersect_wh[:, :, :, 0], intersect_wh[:, :, :, 1])

    # calculate the best IOU, set 0.0 confidence for worse boxes
    iou = tf.truediv(intersect, _areas + area_pred - intersect)
    best_box = tf.equal(iou, tf.reduce_max(iou, [2], True))
    best_box = tf.to_float(best_box)
    confs = tf.multiply(best_box, _confs)

    # take care of the weight terms
    conid = snoob * (1. - confs) + sconf * confs
    weight_coo = tf.concat(4 * [tf.expand_dims(confs, -1)], 3)
    cooid = scoor * weight_coo
    proid = sprob * _proid

    # flatten 'em all
    probs = slim.flatten(_probs)
    proid = slim.flatten(proid)
    confs = slim.flatten(confs)
    conid = slim.flatten(conid)
    coord = slim.flatten(_coord)
    cooid = slim.flatten(cooid)

    self.fetch += [probs, confs, conid, cooid, proid]
    true = tf.concat([probs, confs, coord], 1)
    wght = tf.concat([proid, conid, cooid], 1)
    print('Building {} loss'.format(m['model']))
    loss = tf.pow(net_out - true, 2)
    loss = tf.multiply(loss, wght)
    loss = tf.reduce_sum(loss, 1)
    self.loss = .5 * tf.reduce_mean(loss)
    tf.summary.scalar('{} loss'.format(m['model']), self.loss)
Beispiel #26
0
def inception_v4(inputs,
                 is_training=True,
                 dropout_keep_prob=0.8,
                 reuse=None,
                 scope='InceptionV4'):
    """Creates the Inception V4 model.

    Args:
      inputs: a 4-D tensor of size [batch_size, height, width, 3].
      num_classes: number of predicted classes. If 0 or None, the logits layer
        is omitted and the input features to the logits layer (before dropout)
        are returned instead.
      is_training: whether is training or not.
      dropout_keep_prob: float, the fraction to keep before final layer.
      reuse: whether or not the network and its variables should be reused. To be
        able to reuse 'scope' must be given.
      scope: Optional variable_scope.
      create_aux_logits: Whether to include the auxiliary logits.

    Returns:
      net: a Tensor with the logits (pre-softmax activations) if num_classes
        is a non-zero integer, or the non-dropped input to the logits layer
        if num_classes is 0 or None.
      end_points: the set of end_points from the inception model.
    """
    with tf.variable_scope(scope, 'InceptionV4', [inputs],
                           reuse=reuse) as scope:
        with slim.arg_scope([slim.batch_norm, slim.dropout],
                            is_training=is_training):
            net, end_points = inception_v4_base(inputs, scope=scope)

            with slim.arg_scope(
                [slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                    stride=1,
                    padding='SAME'):
                # Final pooling and prediction
                # TODO(sguada,arnoegw): Consider adding a parameter global_pool which
                # can be set to False to disable pooling here (as in resnet_*()).
                with tf.variable_scope('Embeddings'):
                    # 8 x 8 x 1536
                    kernel_size = net.get_shape()[1:3]
                    if kernel_size.is_fully_defined():
                        net = slim.avg_pool2d(net,
                                              kernel_size,
                                              padding='VALID',
                                              scope='AvgPool_1a')
                    else:
                        net = tf.reduce_mean(input_tensor=net,
                                             axis=[1, 2],
                                             keepdims=True,
                                             name='global_pool')
                    # 1 x 1 x 1536
                    net = slim.dropout(net,
                                       dropout_keep_prob,
                                       scope='Dropout_1b')
                    net = slim.flatten(net, scope='PreEmbeddingsFlatten')
                    # 1536
                    net = slim.fully_connected(net,
                                               512,
                                               activation_fn=None,
                                               scope='Embeddings')
                    net = (tf.math.tanh(net) + 1) / 2
        return net
Beispiel #27
0
def inception_resnet_v2(inputs,
                        is_training=True,
                        dropout_keep_prob=0.8,
                        bottleneck_layer_size=128,
                        reuse=None,
                        scope='InceptionResnetV2'):
    """Creates the Inception Resnet V2 model.
    Args:
      inputs: a 4-D tensor of size [batch_size, height, width, 3].
      num_classes: number of predicted classes.
      is_training: whether is training or not.
      dropout_keep_prob: float, the fraction to keep before final layer.
      reuse: whether or not the network and its variables should be reused. To be
        able to reuse 'scope' must be given.
      scope: Optional variable_scope.
    Returns:
      logits: the logits outputs of the model.
      end_points: the set of end_points from the inception model.
    """
    end_points = {}

    with tf.variable_scope(scope, 'InceptionResnetV2', [inputs], reuse=reuse):
        with slim.arg_scope([slim.batch_norm, slim.dropout],
                            is_training=is_training):
            with slim.arg_scope(
                [slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                    stride=1,
                    padding='SAME'):

                # 149 x 149 x 32
                net = slim.conv2d(inputs,
                                  32,
                                  3,
                                  stride=2,
                                  padding='VALID',
                                  scope='Conv2d_1a_3x3')
                end_points['Conv2d_1a_3x3'] = net
                # 147 x 147 x 32
                net = slim.conv2d(net,
                                  32,
                                  3,
                                  padding='VALID',
                                  scope='Conv2d_2a_3x3')
                end_points['Conv2d_2a_3x3'] = net
                # 147 x 147 x 64
                net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3')
                end_points['Conv2d_2b_3x3'] = net
                # 73 x 73 x 64
                net = slim.max_pool2d(net,
                                      3,
                                      stride=2,
                                      padding='VALID',
                                      scope='MaxPool_3a_3x3')
                end_points['MaxPool_3a_3x3'] = net
                # 73 x 73 x 80
                net = slim.conv2d(net,
                                  80,
                                  1,
                                  padding='VALID',
                                  scope='Conv2d_3b_1x1')
                end_points['Conv2d_3b_1x1'] = net
                # 71 x 71 x 192
                net = slim.conv2d(net,
                                  192,
                                  3,
                                  padding='VALID',
                                  scope='Conv2d_4a_3x3')
                end_points['Conv2d_4a_3x3'] = net
                # 35 x 35 x 192
                net = slim.max_pool2d(net,
                                      3,
                                      stride=2,
                                      padding='VALID',
                                      scope='MaxPool_5a_3x3')
                end_points['MaxPool_5a_3x3'] = net

                # 35 x 35 x 320
                with tf.variable_scope('Mixed_5b'):
                    with tf.variable_scope('Branch_0'):
                        tower_conv = slim.conv2d(net,
                                                 96,
                                                 1,
                                                 scope='Conv2d_1x1')
                    with tf.variable_scope('Branch_1'):
                        tower_conv1_0 = slim.conv2d(net,
                                                    48,
                                                    1,
                                                    scope='Conv2d_0a_1x1')
                        tower_conv1_1 = slim.conv2d(tower_conv1_0,
                                                    64,
                                                    5,
                                                    scope='Conv2d_0b_5x5')
                    with tf.variable_scope('Branch_2'):
                        tower_conv2_0 = slim.conv2d(net,
                                                    64,
                                                    1,
                                                    scope='Conv2d_0a_1x1')
                        tower_conv2_1 = slim.conv2d(tower_conv2_0,
                                                    96,
                                                    3,
                                                    scope='Conv2d_0b_3x3')
                        tower_conv2_2 = slim.conv2d(tower_conv2_1,
                                                    96,
                                                    3,
                                                    scope='Conv2d_0c_3x3')
                    with tf.variable_scope('Branch_3'):
                        tower_pool = slim.avg_pool2d(net,
                                                     3,
                                                     stride=1,
                                                     padding='SAME',
                                                     scope='AvgPool_0a_3x3')
                        tower_pool_1 = slim.conv2d(tower_pool,
                                                   64,
                                                   1,
                                                   scope='Conv2d_0b_1x1')
                    net = tf.concat([
                        tower_conv, tower_conv1_1, tower_conv2_2, tower_pool_1
                    ], 3)

                end_points['Mixed_5b'] = net
                net = slim.repeat(net, 10, block35, scale=0.17)

                # 17 x 17 x 1024
                with tf.variable_scope('Mixed_6a'):
                    with tf.variable_scope('Branch_0'):
                        tower_conv = slim.conv2d(net,
                                                 384,
                                                 3,
                                                 stride=2,
                                                 padding='VALID',
                                                 scope='Conv2d_1a_3x3')
                    with tf.variable_scope('Branch_1'):
                        tower_conv1_0 = slim.conv2d(net,
                                                    256,
                                                    1,
                                                    scope='Conv2d_0a_1x1')
                        tower_conv1_1 = slim.conv2d(tower_conv1_0,
                                                    256,
                                                    3,
                                                    scope='Conv2d_0b_3x3')
                        tower_conv1_2 = slim.conv2d(tower_conv1_1,
                                                    384,
                                                    3,
                                                    stride=2,
                                                    padding='VALID',
                                                    scope='Conv2d_1a_3x3')
                    with tf.variable_scope('Branch_2'):
                        tower_pool = slim.max_pool2d(net,
                                                     3,
                                                     stride=2,
                                                     padding='VALID',
                                                     scope='MaxPool_1a_3x3')
                    net = tf.concat([tower_conv, tower_conv1_2, tower_pool], 3)

                end_points['Mixed_6a'] = net
                net = slim.repeat(net, 20, block17, scale=0.10)

                with tf.variable_scope('Mixed_7a'):
                    with tf.variable_scope('Branch_0'):
                        tower_conv = slim.conv2d(net,
                                                 256,
                                                 1,
                                                 scope='Conv2d_0a_1x1')
                        tower_conv_1 = slim.conv2d(tower_conv,
                                                   384,
                                                   3,
                                                   stride=2,
                                                   padding='VALID',
                                                   scope='Conv2d_1a_3x3')
                    with tf.variable_scope('Branch_1'):
                        tower_conv1 = slim.conv2d(net,
                                                  256,
                                                  1,
                                                  scope='Conv2d_0a_1x1')
                        tower_conv1_1 = slim.conv2d(tower_conv1,
                                                    288,
                                                    3,
                                                    stride=2,
                                                    padding='VALID',
                                                    scope='Conv2d_1a_3x3')
                    with tf.variable_scope('Branch_2'):
                        tower_conv2 = slim.conv2d(net,
                                                  256,
                                                  1,
                                                  scope='Conv2d_0a_1x1')
                        tower_conv2_1 = slim.conv2d(tower_conv2,
                                                    288,
                                                    3,
                                                    scope='Conv2d_0b_3x3')
                        tower_conv2_2 = slim.conv2d(tower_conv2_1,
                                                    320,
                                                    3,
                                                    stride=2,
                                                    padding='VALID',
                                                    scope='Conv2d_1a_3x3')
                    with tf.variable_scope('Branch_3'):
                        tower_pool = slim.max_pool2d(net,
                                                     3,
                                                     stride=2,
                                                     padding='VALID',
                                                     scope='MaxPool_1a_3x3')
                    net = tf.concat([
                        tower_conv_1, tower_conv1_1, tower_conv2_2, tower_pool
                    ], 3)

                end_points['Mixed_7a'] = net

                net = slim.repeat(net, 9, block8, scale=0.20)
                net = block8(net, activation_fn=None)

                net = slim.conv2d(net, 1536, 1, scope='Conv2d_7b_1x1')
                end_points['Conv2d_7b_1x1'] = net

                with tf.variable_scope('Logits'):
                    end_points['PrePool'] = net
                    #pylint: disable=no-member
                    net = slim.avg_pool2d(net,
                                          net.get_shape()[1:3],
                                          padding='VALID',
                                          scope='AvgPool_1a_8x8')
                    net = slim.flatten(net)

                    net = slim.dropout(net,
                                       dropout_keep_prob,
                                       is_training=is_training,
                                       scope='Dropout')

                    end_points['PreLogitsFlatten'] = net

                net = slim.fully_connected(net,
                                           bottleneck_layer_size,
                                           activation_fn=None,
                                           scope='Bottleneck',
                                           reuse=False)

    return net, end_points
Beispiel #28
0
def inception_v3(images,
                 trainable=True,
                 is_training=True,
                 weight_decay=0.00004,
                 stddev=0.1,
                 dropout_keep_prob=0.8,
                 use_batch_norm=True,
                 batch_norm_params=None,
                 add_summaries=True,
                 scope="InceptionV3"):
    """Builds an Inception V3 subgraph for image embeddings.

    Args:
      images: A float32 Tensor of shape [batch, height, width, channels].
      trainable: Whether the inception submodel should be trainable or not.
      is_training: Boolean indicating training mode or not.
      weight_decay: Coefficient for weight regularization.
      stddev: The standard deviation of the trunctated normal weight initializer.
      dropout_keep_prob: Dropout keep probability.
      use_batch_norm: Whether to use batch normalization.
      batch_norm_params: Parameters for batch normalization. See
        tf.contrib.layers.batch_norm for details.
      add_summaries: Whether to add activation summaries.
      scope: Optional Variable scope.

    Returns:
      end_points: A dictionary of activations from inception_v3 layers.
    """
    # Only consider the inception model to be in training mode if it's trainable.
    is_inception_model_training = trainable and is_training

    if use_batch_norm:
        # Default parameters for batch normalization.
        if not batch_norm_params:
            batch_norm_params = {
                "is_training": is_inception_model_training,
                "trainable": trainable,
                # Decay for the moving averages.
                "decay": 0.9997,
                # Epsilon to prevent 0s in variance.
                "epsilon": 0.001,
                # Collection containing the moving mean and moving variance.
                "variables_collections": {
                    "beta": None,
                    "gamma": None,
                    "moving_mean": ["moving_vars"],
                    "moving_variance": ["moving_vars"],
                }
            }
    else:
        batch_norm_params = None

    if trainable:
        weights_regularizer = tf.contrib.layers.l2_regularizer(weight_decay)
    else:
        weights_regularizer = None

    with tf.compat.v1.variable_scope(scope, "InceptionV3", [images]) as scope:
        with slim.arg_scope([slim.conv2d, slim.fully_connected],
                            weights_regularizer=weights_regularizer,
                            trainable=trainable):
            with slim.arg_scope([slim.conv2d],
                                weights_initializer=tf.compat.v1.
                                truncated_normal_initializer(stddev=stddev),
                                activation_fn=tf.nn.relu,
                                normalizer_fn=slim.batch_norm,
                                normalizer_params=batch_norm_params):
                net, end_points = inception_v3_base(images, scope=scope)
                with tf.compat.v1.variable_scope("logits"):
                    shape = net.get_shape()
                    net = slim.avg_pool2d(net,
                                          shape[1:3],
                                          padding="VALID",
                                          scope="pool")
                    net = slim.dropout(net,
                                       keep_prob=dropout_keep_prob,
                                       is_training=is_inception_model_training,
                                       scope="dropout")
                    net = slim.flatten(net, scope="flatten")

    # Add summaries.
    if add_summaries:
        for v in end_points.values():
            slim.summarize_activation(v)

    return net
Beispiel #29
0
    def build_mnf_lenet(self, x, sample=True):
        if not self.built:
            self.layers = []
        with tf.variable_scope(self.opts):
            if not self.built:
                layer1 = Conv2DMNF(self.layer_dims[0],
                                   5,
                                   5,
                                   N=self.N,
                                   input_shape=self.input_shape,
                                   border_mode='VALID',
                                   flows_q=self.flows_q,
                                   flows_r=self.flows_r,
                                   logging=self.logging,
                                   use_z=self.use_z,
                                   learn_p=self.learn_p,
                                   prior_var=self.prior_var_w,
                                   prior_var_b=self.prior_var_b,
                                   thres_var=self.thres_var,
                                   flow_dim_h=self.flow_dim_h)
                self.layers.append(layer1)
            else:
                layer1 = self.layers[0]
            h1 = self.activation(
                tf.nn.max_pool(layer1(x, sample=sample), [1, 2, 2, 1],
                               [1, 2, 2, 1], 'SAME'))

            if not self.built:
                shape = [None] + [s.value for s in h1.get_shape()[1:]]
                layer2 = Conv2DMNF(self.layer_dims[1],
                                   5,
                                   5,
                                   N=self.N,
                                   input_shape=shape,
                                   border_mode='VALID',
                                   flows_q=self.flows_q,
                                   flows_r=self.flows_r,
                                   use_z=self.use_z,
                                   logging=self.logging,
                                   learn_p=self.learn_p,
                                   flow_dim_h=self.flow_dim_h,
                                   thres_var=self.thres_var,
                                   prior_var=self.prior_var_w,
                                   prior_var_b=self.prior_var_b)
                self.layers.append(layer2)
            else:
                layer2 = self.layers[1]
            h2 = slim.flatten(
                self.activation(
                    tf.nn.max_pool(layer2(h1, sample=sample), [1, 2, 2, 1],
                                   [1, 2, 2, 1], 'SAME')))

            if not self.built:
                fcinp_dim = h2.get_shape()[1].value
                layer3 = DenseMNF(self.layer_dims[2],
                                  N=self.N,
                                  input_dim=fcinp_dim,
                                  flows_q=self.flows_q,
                                  flows_r=self.flows_r,
                                  use_z=self.use_z,
                                  logging=self.logging,
                                  learn_p=self.learn_p,
                                  prior_var=self.prior_var_w,
                                  prior_var_b=self.prior_var_b,
                                  flow_dim_h=self.flow_dim_h,
                                  thres_var=self.thres_var)
                self.layers.append(layer3)
            else:
                layer3 = self.layers[2]
            h3 = self.activation(layer3(h2, sample=sample))

            if not self.built:
                fcinp_dim = h3.get_shape()[1].value
                layerout = DenseMNF(self.nb_classes,
                                    N=self.N,
                                    input_dim=fcinp_dim,
                                    flows_q=self.flows_q,
                                    flows_r=self.flows_r,
                                    use_z=self.use_z,
                                    logging=self.logging,
                                    learn_p=self.learn_p,
                                    prior_var=self.prior_var_w,
                                    prior_var_b=self.prior_var_b,
                                    flow_dim_h=self.flow_dim_h,
                                    thres_var=self.thres_var)
                self.layers.append(layerout)
            else:
                layerout = self.layers[3]

        if not self.built:
            self.built = True
        return layerout(h3, sample=sample)
Beispiel #30
0
    def __init__(self, h_size):
        # The network receives a frame from the game, flattened into an array.
        # It then resizes it and processes it through four convolutional layers.
        self.scalarInput = v1.placeholder(shape=[None, 84672],
                                          dtype=tf.float32)
        self.imageIn = tf.reshape(self.scalarInput, shape=[-1, 168, 168,
                                                           3])  # RESHAPE LINE
        self.conv1 = slim.conv2d(inputs=self.imageIn,
                                 num_outputs=32,
                                 kernel_size=[12, 12],
                                 stride=[6, 6],
                                 padding='VALID',
                                 biases_initializer=None)
        self.conv2 = slim.conv2d(inputs=self.conv1,
                                 num_outputs=64,
                                 kernel_size=[5, 5],
                                 stride=[2, 2],
                                 padding='VALID',
                                 biases_initializer=None)
        self.conv3 = slim.conv2d(inputs=self.conv2,
                                 num_outputs=64,
                                 kernel_size=[6, 6],
                                 stride=[1, 1],
                                 padding='VALID',
                                 biases_initializer=None)
        self.conv4 = slim.conv2d(inputs=self.conv3,
                                 num_outputs=h_size,
                                 kernel_size=[7, 7],
                                 stride=[1, 1],
                                 padding='VALID',
                                 biases_initializer=None)
        # We take the output from the final convolutional layer and split it into separate advantage and value streams.
        self.streamAC, self.streamVC = tf.split(self.conv4, 2, axis=3)
        self.streamA = slim.flatten(self.streamAC)
        self.streamV = slim.flatten(self.streamVC)
        xavier_init = tf.initializers.GlorotUniform(
        )  # xavier_init = tf.contrib.layers.xavier_initializer() ---no contrib lib in tf 2.0
        self.AW = tf.Variable(xavier_init([h_size // 2,
                                           2]))  # WHY IS THIS 29 (env.actions)
        self.VW = tf.Variable(xavier_init([h_size // 2, 1]))
        self.Advantage = tf.matmul(self.streamA, self.AW)
        self.Value = tf.matmul(self.streamV, self.VW)

        # Then combine them together to get our final Q-values.
        self.Qout = self.Value + tf.subtract(
            self.Advantage,
            tf.reduce_mean(self.Advantage, axis=1, keepdims=True))
        self.predict = tf.argmax(self.Qout, 1)
        self.extract_value, self.extract_index = tf.nn.top_k(self.Qout,
                                                             1,
                                                             sorted=True)

        # Below we obtain the loss by taking the sum of squares difference between the target and prediction Q values.
        self.targetQ = v1.placeholder(shape=[None], dtype=tf.float32)
        self.actions = v1.placeholder(shape=[None], dtype=tf.int32)
        self.actions_onehot = tf.one_hot(self.actions, 2,
                                         dtype=tf.float32)  # WHY IS THIS 29

        self.Q = tf.reduce_sum(tf.multiply(self.Qout, self.actions_onehot),
                               axis=1)

        self.td_error = tf.square(self.targetQ - self.Q)
        self.loss = tf.reduce_mean(self.td_error)
        self.trainer = v1.train.AdamOptimizer(learning_rate=0.001)
        self.updateModel = self.trainer.minimize(self.loss)