Exemple #1
0
def encoder(x):
   
   e_conv1 = slim.convolution(x, 32, 2, stride=2, activation_fn=tf.identity, normalizer_fn=slim.batch_norm, scope='e_conv1')
   e_conv1 = lrelu(e_conv1) 
   print 'conv1: ', e_conv1

   e_conv2 = slim.convolution(e_conv1, 64, 2, stride=2, activation_fn=tf.identity, normalizer_fn=slim.batch_norm, scope='e_conv2')
   e_conv2 = lrelu(e_conv2)
   print 'conv2: ', e_conv2
   
   # convolutional layer with a leaky Relu activation
   e_conv3 = slim.convolution(e_conv2, 128, 2, stride=2, activation_fn=tf.identity, normalizer_fn=slim.batch_norm, scope='e_conv3')
   e_conv3 = lrelu(e_conv3)
   print 'conv3: ', e_conv3
  
   e_conv3_flat = tf.reshape(e_conv3, [batch_size, -1])

   e_fc1 = slim.fully_connected(e_conv3_flat, 256, normalizer_fn=slim.batch_norm, activation_fn=tf.identity, scope='e_fc1')
   e_fc1 = lrelu(e_fc1)
   print 'fc1: ', e_fc1
   
   e_fc2 = slim.fully_connected(e_fc1, 64, normalizer_fn=slim.batch_norm, activation_fn=tf.identity, scope='e_fc2')
   e_fc2 = lrelu(e_fc2)
   print 'fc2: ', e_fc2
   
   e_fc3 = slim.fully_connected(e_fc2, 32, normalizer_fn=slim.batch_norm, activation_fn=tf.identity, scope='e_fc3')
   e_fc3 = lrelu(e_fc3)
   print 'fc3: ', e_fc3
   
   e_fc4 = slim.fully_connected(e_fc3, 8, normalizer_fn=slim.batch_norm, activation_fn=tf.identity, scope='e_fc4')
   e_fc4 = lrelu(e_fc4)
   print 'fc4: ', e_fc4
   return e_fc4
Exemple #2
0
  def create_model(self, model_input, vocab_size, num_frames, **unused_params):
    """Creates a model which uses a logistic classifier over the average of the
    frame-level features.

    This class is intended to be an example for implementors of frame level
    models. If you want to train a model over averaged features it is more
    efficient to average them beforehand rather than on the fly.

    Args:
      model_input: A 'batch_size' x 'max_frames' x 'num_features' matrix of
                   input features.
      vocab_size: The number of classes in the dataset.
      num_frames: A vector of length 'batch' which indicates the number of
           frames for each video (before padding).

    Returns:
      A dictionary with a tensor containing the probability predictions of the
      model in the 'predictions' key. The dimensions of the tensor are
      'batch_size' x 'num_classes'.
    """
    sliced_input = tf.slice(model_input, [0,0,0],[-1,120,-1])

    conv_output = slim.convolution(sliced_input, 512, [9], 5,
     "VALID", data_format = "NWC")
    conv_output = slim.convolution(conv_output, 512, [5], 3,
     "VALID", data_format = "NWC")    
    conv_output = slim.flatten(conv_output)
    output = slim.fully_connected(
        conv_output, vocab_size, activation_fn=tf.nn.sigmoid,
        weights_regularizer=slim.l2_regularizer(1e-8))
    return {"predictions": output}
def resnet_v22(inputs,
               blocks,
               num_classes=None,
               is_training=True,
               reuse=None,
               scope=None):
    with tf.variable_scope(scope, 'resnet_v22', [inputs], reuse=reuse) as sc:
        end_points_collection = sc.original_name_scope + '_end_points'
        with slim.arg_scope([slim.convolution, bottleneck, stack_blocks_dense],
                            outputs_collections=end_points_collection):
            with slim.arg_scope([slim.dropout], is_training=is_training):
                net = inputs
                net = slim.convolution(net,
                                       64,
                                       16,
                                       stride=1,
                                       padding='SAME',
                                       scope='conv1')

                shortcut = subsample(net, factor=2, scope='shortcut')
                net = conv2d_same(net, 64, 16, stride=2, scope='conv2')
                # net = slim.dropout(net, keep_prob=0.8, scope='droput')
                net = slim.convolution(net,
                                       64,
                                       16,
                                       stride=1,
                                       padding='SAME',
                                       scope='conv3')
                # net = slim.convolution(net, 64, 16,
                #                        stride=1,
                #                        padding='SAME',
                #                        normalizer_fn=None,
                #                        activation_fn=None,
                #                        scope='conv3'
                #                        )

                net = net + shortcut
                net = stack_blocks_dense(net, blocks)
                # net = slim.batch_norm(net, activation_fn=tf.nn.relu, scope='postnorm')
                print('last:', net)
                if num_classes is not None:
                    net = slim.flatten(net, scope='flatten')
                    net = slim.fully_connected(net,
                                               num_classes,
                                               activation_fn=tf.nn.relu,
                                               normalizer_fn=None,
                                               scope='fc')
                end_points = utils.convert_collection_to_dict(
                    end_points_collection)
                if num_classes is not None:
                    end_points['predictions'] = slim.softmax(
                        net, scope='predictions')
                return net, end_points
Exemple #4
0
def vggm1234(x, TRAIN_COVN=True):

    net = slim.convolution(x,
                           96, [7, 7],
                           2,
                           padding='VALID',
                           scope='conv1',
                           activation_fn=tf.nn.relu,
                           reuse=tf.AUTO_REUSE,
                           trainable=TRAIN_COVN)
    net = tf.nn.lrn(net, depth_radius=5, bias=2, alpha=1e-4 * 1, beta=0.75)
    net = slim.pool(net, [3, 3],
                    'MAX',
                    stride=2,
                    padding='VALID',
                    scope='pool1')

    net = slim.convolution(net,
                           256, [5, 5],
                           2,
                           padding='VALID',
                           scope='conv2',
                           activation_fn=tf.nn.relu,
                           reuse=tf.AUTO_REUSE,
                           trainable=TRAIN_COVN)
    net = tf.nn.lrn(net, depth_radius=5, bias=2, alpha=1e-4 * 1, beta=0.75)
    net = slim.pool(net, [3, 3],
                    'MAX',
                    stride=2,
                    padding='VALID',
                    scope='pool2')

    net = slim.convolution(net,
                           512, [3, 3],
                           1,
                           padding='VALID',
                           scope='conv3',
                           activation_fn=tf.nn.relu,
                           reuse=tf.AUTO_REUSE,
                           trainable=TRAIN_COVN)

    net = slim.convolution(net,
                           512, [3, 3],
                           1,
                           padding='VALID',
                           scope='conv4',
                           activation_fn=tf.nn.relu,
                           reuse=tf.AUTO_REUSE,
                           trainable=TRAIN_COVN)

    return U.flattenallbut0(net)
def bottleneck2(inputs,
                depth,
                depth_bottleneck,
                stride,
                outputs_collections=None,
                scope=None):
    with tf.variable_scope(scope, 'bottleneck_v2', [inputs]) as sc:
        # preact = slim.batch_norm(inputs, activation_fn=tf.nn.relu, scope='preact')
        preact = inputs  #tf.nn.relu(inputs)
        depth_in = utils.last_dimension(inputs.get_shape(), min_rank=3)

        if depth_in == depth:
            shortcut = subsample(inputs, stride, 'shortcut')
        else:
            shortcut = slim.convolution(inputs,
                                        depth,
                                        1,
                                        stride=stride,
                                        padding='SAME',
                                        normalizer_fn=None,
                                        activation_fn=None,
                                        scope='shortcut')

        # residual = slim.dropout(preact, keep_prob=0.8, scope='dropout1')

        residual = conv2d_same(preact,
                               depth_bottleneck,
                               16,
                               stride,
                               scope='conv1')

        # residual = slim.dropout(residual, keep_prob=0.8, scope='dropout2')

        residual = slim.convolution(residual,
                                    depth,
                                    16,
                                    stride=1,
                                    padding='SAME',
                                    scope='conv2')
        # residual = slim.convolution(residual, depth, 16,
        #                             stride=1,
        #                             padding='SAME',
        #                             normalizer_fn=None,
        #                             activation_fn=None,
        #                             scope='conv2'
        #                             )

        output = shortcut + residual
        return utils.collect_named_outputs(outputs_collections, sc.name,
                                           output)
    def create_model(self,
                     model_input,
                     vocab_size,
                     labels,
                     scope='default',
                     is_training=True,
                     **unused_params):
        X = FLAGS.residualcnn_x
        with tf.variable_scope(scope, tf.AUTO_REUSE):
            fc = slim.fully_connected(
                model_input,
                X,
                weights_regularizer=tf.contrib.layers.l2_regularizer(0.01))
            reshaped_input = tf.expand_dims(fc, -1)
            reshaped_input = tf.expand_dims(reshaped_input, -1)

            conv1 = slim.convolution(reshaped_input, 64, [49, 1])
            conv1_norm = slim.batch_norm(conv1, is_training=is_training)

            module1 = self.residual_module([128, 192, 64], conv1_norm,
                                           'module1')
            module1_norm = slim.batch_norm(module1, is_training=is_training)

            conv2 = slim.convolution(module1_norm, 128, 1)
            conv2_norm = slim.batch_norm(conv2, is_training=is_training)

            module2 = self.residual_module([256, 512, 128], conv2_norm,
                                           'module2')
            module2_norm = slim.batch_norm(module2, is_training=is_training)

            conv3 = slim.convolution(module2_norm, 256, 1)
            conv3_norm = slim.batch_norm(conv3, is_training=is_training)

            module3 = self.residual_module([512, 256], conv3_norm, 'module3')
            module3_norm = slim.batch_norm(module3, is_training=is_training)

            conv4 = slim.convolution(module3_norm, X, 1)
            conv4_norm = slim.batch_norm(conv4, is_training=is_training)

            module4 = self.residual_module([512, X], conv4_norm, 'module4')

            features = tf.squeeze(module4, [2])
            features = model_utils.FramePooling(features,
                                                FLAGS.residualcnn_pooling) + fc
            results = MoeModel().create_model(features, vocab_size)
            results['features'] = features
            if labels != None:
                results['loss'] = losses.CrossEntropyLoss().calculate_loss(
                    results['predictions'], labels)
            return results
 def res_block(net, nb_filter, scope):
     residual = net
     net = slim.convolution(net,
                            nb_filter, [1, 1],
                            1,
                            scope='%s_res_1' % scope)
     net = slim.convolution(net,
                            nb_filter, [3, 3],
                            1,
                            scope='%s_res_2' % scope)
     net = slim.convolution(net,
                            nb_filter, [1, 1],
                            1,
                            scope='%s_res_3' % scope)
     return net + residual
Exemple #8
0
    def build_predict_op(self, input_tensor, is_training=False):
        """
        Builds the graph from input tensor to model prediction. The 'is_training' argument is not used for now, but
        it allows easy handling of potential dropout/batchnorm layers.

        Args:
            input_tensor (tf tensor): input, with dimensions [batch_size, time, nr_channels=1].
            is_training (bool): whether in training mode (True) or evaluation mode (False)
        Returns:
            (tf operation): computes model predictions with dimensions [batch_size, mel_bands, time, nr_channels=1].
        """

        predict_op = input_tensor

        with tf.variable_scope('MSTmodel'):
            predict_op = slim.convolution(predict_op, 512, [1024], stride=[512], padding='SAME',
                                          activation_fn=None,
                                          weights_initializer=self.W_init, biases_initializer=self.b_init,
                                          weights_regularizer=self.W_reg, biases_regularizer=self.b_reg,
                                          scope='cnn_raw_1')
            predict_op = slim.batch_norm(predict_op, updates_collections=None, scope='cnn_raw_1',
                                            is_training=is_training)
            predict_op = tf.nn.relu(predict_op)
            predict_op = slim.convolution(predict_op, 256, [3], stride=[1], padding='SAME',
                                          activation_fn=None,
                                          weights_initializer=self.W_init, biases_initializer=self.b_init,
                                          weights_regularizer=self.W_reg, biases_regularizer=self.b_reg,
                                          scope='cnn_raw_2')
            predict_op = slim.batch_norm(predict_op, updates_collections=None, scope='cnn_raw_2',
                                            is_training=is_training)
            predict_op = tf.nn.relu(predict_op)
            predict_op = slim.convolution(predict_op, 60, [3], stride=[1], padding='SAME',
                                          activation_fn=None,
                                          weights_initializer=self.W_init, biases_initializer=self.b_init,
                                          weights_regularizer=self.W_reg, biases_regularizer=self.b_reg,
                                          scope='cnn_raw_3')
            predict_op = slim.batch_norm(predict_op, updates_collections=None, scope='cnn_raw_3',
                                            is_training=is_training)
            predict_op = tf.nn.tanh(predict_op)

        # transpose and add a channel dimension to match with the shape of the label
        predict_op = tf.transpose(predict_op, [0, 2, 1])
        predict_op = tf.expand_dims(predict_op, 3)

        return predict_op
Exemple #9
0
def QNet(inputs, width, is_training=True, reuse=False, scope="QNet"):
    with tf.variable_scope(scope, reuse=reuse):
        with slim.arg_scope(
            [slim.convolution, slim.fully_connected],
                weights_initializer=slim.initializers.xavier_initializer(),
                weights_regularizer=slim.l2_regularizer(0.01),
                biases_initializer=tf.zeros_initializer(),
                biases_regularizer=slim.l2_regularizer(0.01),
        ):
            net = slim.convolution(inputs=inputs,
                                   num_outputs=32,
                                   kernel_size=5,
                                   stride=1,
                                   activation_fn=tf.nn.relu,
                                   padding="VALID",
                                   scope="conv1")
            net = slim.batch_norm(net, is_training=is_training)
            net = slim.convolution(inputs=net,
                                   num_outputs=64,
                                   kernel_size=5,
                                   stride=1,
                                   activation_fn=tf.nn.relu,
                                   padding="VALID",
                                   scope="conv2")
            net = slim.batch_norm(net, is_training=is_training)
            net = slim.flatten(net)
            net = slim.fully_connected(
                inputs=net,
                num_outputs=512,
                activation_fn=tf.nn.relu,
                scope="fc1",
            )
            net = slim.batch_norm(net, is_training=is_training)
            net = slim.fully_connected(inputs=net,
                                       num_outputs=512,
                                       activation_fn=tf.nn.relu,
                                       scope="fc2")
            net = slim.batch_norm(net, is_training=is_training)
            net = slim.fully_connected(inputs=net,
                                       num_outputs=width**2,
                                       activation_fn=None,
                                       scope="fc3")
            net = tf.reshape(net, (-1, width, width))
            return net
def conv2d_same(inputs, num_outs, kernal_size, stride, scope=None):
    if stride == 1:
        return slim.convolution(inputs,
                                num_outs,
                                kernal_size,
                                stride=1,
                                padding='SAME',
                                scope=scope)
    else:
        pad_total = kernal_size - 1
        pad_beg = pad_total // 2
        pad_end = pad_total - pad_beg
        inputs = tf.pad(inputs, [[0, 0], [pad_beg, pad_end], [0, 0]])
        return slim.convolution(inputs,
                                num_outs,
                                kernal_size,
                                stride=stride,
                                padding='VALID',
                                scope=scope)
    def residual_module(self, params, inp, scope='default'):

        with tf.variable_scope(scope, tf.AUTO_REUSE):
            depth = len(params)
            out = inp
            for i in range(depth):
                out = slim.convolution(out,
                                       params[i], (9, 1),
                                       rate=(2 * i + 1, 1))
            return inp + out
Exemple #12
0
def conv3d(*args, **kwargs):
    out = slim.convolution(*args, **kwargs)
    print kwargs['scope'], '->', shape(out), 'before:', shape(args[0])
    if 0:
        out = tf.Print(out, [
            kwargs['scope'],
            tf.reduce_mean(out, [0, 1, 2, 3]),
            tf.nn.moments(out, axes=[0, 1, 2, 3])
        ],
                       summarize=20)
    return out
    def build_architecture(self, inputs_dict):
        """Builds the RNN text encoder.

        Returns:
            rnn_outputs: A list of outputs for all RNNs. This is a list even if
                there is one RNN being constructed.
        """

        caption_batch = inputs_dict['caption_batch']
        embedding = inputs_dict['embedding_batch']
        seq_length = compute_sequence_length(caption_batch)

        # Build convolutions
        with slim.arg_scope([slim.convolution, slim.fully_connected],
                            activation_fn=tf.nn.relu,
                            weights_regularizer=slim.l2_regularizer(0.0005)):

            net = slim.convolution(embedding, 128, 3, scope='conv1')
            net = slim.convolution(net, 128, 3, scope='conv2')
            net = tf.layers.batch_normalization(net, training=self.is_training)
            # net = slim.pool(net, 2, 'MAX')  # change the sequence length
            net = slim.convolution(net, 256, 3, scope='conv3')
            net = slim.convolution(net, 256, 3, scope='conv4')
            net = tf.layers.batch_normalization(net, training=self.is_training)
            # net = slim.pool(net, 2, 'MAX')

            rnn_cell = tf.contrib.rnn.GRUCell(num_units=256)
            # initial_state = rnn_cell.zero_state(self._batch_size, tf.float32)

            outputs, final_state = tf.nn.dynamic_rnn(cell=rnn_cell,
                                                     inputs=net,
                                                     sequence_length=seq_length,
                                                     dtype=tf.float32,
                                                     scope='rnn')

            net = extract_last_output(outputs, seq_length)
            net = slim.fully_connected(net, 256, scope='fc5')
            net = slim.fully_connected(net, 128, activation_fn=None, scope='fc6')

        return {'encoder_output': net}
Exemple #14
0
    def G(self, training=True):
        with slim.arg_scope([slim.batch_norm], is_training=training):
            with slim.arg_scope([slim.convolution],
                                normalizer_fn=slim.batch_norm):
                with tf.variable_scope("G"):
                    noise = tf.random_normal(
                        shape=[config_.BATCH_SIZE, config_.num_fc_1])
                    fc1 = slim.fully_connected(
                        noise,
                        num_outputs=config_.FEATURE_LEN,
                        biases_initializer=PointInitializer(.1),
                        weights_regularizer=slim.l2_regularizer(0.001),
                        scope="fc1")
                    fc2 = slim.fully_connected(
                        fc1,
                        num_outputs=config_.FEATURE_LEN * config_.num_filt_2,
                        biases_initializer=PointInitializer(.1),
                        weights_regularizer=slim.l2_regularizer(0.001),
                        scope="fc2")

                    fc2 = tf.reshape(
                        fc2, [-1, config_.FEATURE_LEN, config_.num_filt_2])
                    deconv3 = slim.convolution(
                        fc2,
                        num_outputs=config_.num_filt_1,
                        kernel_size=4,
                        biases_initializer=PointInitializer(.1),
                        weights_regularizer=slim.l2_regularizer(0.001),
                        scope="deconv3")

                    deconv4 = slim.convolution(
                        deconv3,
                        num_outputs=1,
                        kernel_size=5,
                        activation_fn=None,
                        biases_initializer=PointInitializer(.1),
                        weights_regularizer=slim.l2_regularizer(0.001),
                        scope="deconv4")
            return deconv4
 def atrousconv(self,
                x,
                num_out_layers,
                kernel_size,
                stride=1,
                rate=1,
                activation_fn=tf.nn.elu):
     return slim.convolution(x,
                             num_out_layers,
                             kernel_size,
                             stride=stride,
                             rate=rate,
                             activation_fn=activation_fn)
Exemple #16
0
	def conv2d(self, outChannels=20, kernel=3, pool=True, dropout=False, norm=True):
		with tf.name_scope('conv'):
			print("input  shape ", self.last_shape)
			print("conv   outChannels ", outChannels)
			# conv = tf.nn.conv2d(self.last_layer, [1, kernel, kernel, 1], strides=[1, 2, 2, 1])
			# conv = tf.nn.conv2d(self.last_layer, [1, kernel, kernel, 1], strides=[1, 1, 1, 1], padding='SAME')
			conv = slim.convolution(self.last_layer, outChannels, kernel, scope="conv_" + str(len(self.layers)))
			if pool: conv = slim.max_pool2d(conv, [3, 3], scope='pool')
			# if pool: conv = tf.nn.max_pool(conv, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
			if dropout: conv = tf.nn.dropout(conv, self.keep_prob)
			if norm: conv = tf.nn.lrn(conv, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
			if debug: tf.summary.histogram('norm_' + str(len(self.layers)), conv)
			print("output shape ", conv.get_shape())
			self.add(conv)
Exemple #17
0
    def G(self, training=True):
        with slim.arg_scope([slim.batch_norm], is_training=training, scale=True):
            with slim.arg_scope([slim.convolution],
                                normalizer_fn=slim.batch_norm):
                with tf.variable_scope("G"):
                    noise = tf.random_normal(shape=[self.config.BATCH_SIZE, self.config.num_fc_1])
                    fc1 = slim.fully_connected(noise, num_outputs=self.config.FEATURE_LEN,
                                               biases_initializer=PointInitializer(.1),
                                               scope="fc1")
                    fc2 = slim.fully_connected(fc1, num_outputs=self.config.FEATURE_LEN * self.config.num_filt_2,
                                               biases_initializer=PointInitializer(.1),
                                               scope="fc2")

                    fc2 = tf.reshape(fc2, [-1, self.config.FEATURE_LEN, self.config.num_filt_2])
                    deconv3 = slim.convolution(fc2, num_outputs=self.config.num_filt_1, kernel_size=4,
                                               biases_initializer=PointInitializer(.1),
                                               scope="deconv3")

                    deconv4 = slim.convolution(deconv3, num_outputs=1, kernel_size=5, activation_fn=None,
                                               biases_initializer=PointInitializer(.1),
                                               scope="deconv4", normalizer_fn=None)
                    # deconv4 = tf.nn.relu(deconv4)
            return deconv4
Exemple #18
0
def build_conv_layer(is_training, in_feats_mod, param_dict):
    """ needs data_batch_size, conv_depth, conv_actv_str, conv_gene_pair, conv_batch_norm,
     and data_input_size, reg_do_keep_prob, is_training"""
    if param_dict['conv_depth'] > 0:
        #Assuming inputs are still in [batch_size, 2*gene_count]. Will first
        #change it to be [batch_size, gene_count, 2], which slim expects
        #in_feats_mod = tf.Print(in_feats_mod, [in_feats_mod], message="in_feats_mod: ",
        #                        summarize=param_dict['data_batch_size']*param_dict['data_input_size'])
        conv_inputs = tf.reshape(in_feats_mod, [
            param_dict['data_batch_size'], 2,
            int(int(in_feats_mod.shape[1]) / 2)
        ])
        #conv_inputs = tf.Print(conv_inputs, [conv_inputs], message="conv_inputs1: ",
        #                       summarize=param_dict['data_batch_size']*param_dict['data_input_size'])
        conv_inputs = tf.transpose(conv_inputs, [0, 2, 1])
        #conv_inputs = tf.Print(conv_inputs, [conv_inputs], message="conv_inputs2: ",
        #                       summarize=param_dict['data_batch_size']*param_dict['data_input_size'])
        print("model conv reshape: " + str(conv_inputs))
        conv_actv_fn = get_act_fn(param_dict['conv_actv_str'])
        if param_dict['conv_gene_pair']:
            conv_outputs = gene_pair_convolution(
                conv_inputs, param_dict['data_batch_size'], [
                    int(param_dict['data_input_size'] / 2), 2,
                    param_dict['conv_depth']
                ], conv_actv_fn)
        else:
            conv_outputs = slim.convolution(
                inputs=conv_inputs,
                num_outputs=param_dict['conv_depth'],
                kernel_size=1,
                stride=1,
                data_format='NWC',
                activation_fn=conv_actv_fn)
        conv_outputs = tf.contrib.layers.flatten(conv_outputs)
        if param_dict['conv_batch_norm']:
            conv_outputs = batch_normalize(conv_outputs)
        if param_dict['reg_do_keep_prob'] < 1:
            conv_outputs = slim.dropout(
                conv_outputs,
                keep_prob=param_dict['reg_do_keep_prob'],
                is_training=is_training)
    else:
        # Flattens the input while maintaining the batch_size
        conv_outputs = tf.contrib.layers.flatten(in_feats_mod)
    return conv_outputs
Exemple #19
0
    def create_model(self, model_input, vocab_size, num_frames,
                     **unused_params):
        """Creates a model which uses a stack of LSTMs to represent the video.

    Args:
      model_input: A 'batch_size' x 'max_frames' x 'num_features' matrix of
                   input features.
      vocab_size: The number of classes in the dataset.
      num_frames: A vector of length 'batch' which indicates the number of
           frames for each video (before padding).

    Returns:
      A dictionary with a tensor containing the probability predictions of the
      model in the 'predictions' key. The dimensions of the tensor are
      'batch_size' x 'num_classes'.
    """
        lstm_size = FLAGS.lstm_cells
        number_of_layers = FLAGS.lstm_layers

        stacked_lstm = tf.contrib.rnn.MultiRNNCell([
            tf.contrib.rnn.BasicLSTMCell(lstm_size, forget_bias=1.0)
            for _ in range(number_of_layers)
        ],
                                                   state_is_tuple=False)

        loss = 0.0

        outputs, state = tf.nn.dynamic_rnn(stacked_lstm,
                                           model_input,
                                           sequence_length=num_frames,
                                           dtype=tf.float32)

        state = tf.expand_dims(state, axis=1)
        state = tf.expand_dims(state, axis=1)
        state = slim.convolution(state, FLAGS.num_filters, 1, 1, "SAME")

        aggregated_model = getattr(video_level_models,
                                   FLAGS.video_level_classifier_model)

        return aggregated_model().create_model(model_input=state,
                                               vocab_size=vocab_size,
                                               **unused_params)
    def inception_module(self, inp, param, scope):

        with tf.variable_scope(scope, tf.AUTO_REUSE):
            # 1x1
            out1 = slim.convolution(inp, param[0], 1, 1)

            # 3x3
            out2 = slim.convolution(inp, param[1], 1, 1)
            out2 = slim.convolution(out2, param[2], (9, 1), 1)

            # 5x5
            out3 = slim.convolution(inp, param[3], 1, 1)
            out3 = slim.convolution(out3, param[4], (25, 1), 1)

            # pool
            out4 = slim.max_pool2d(inp, (9, 1), 1, padding='SAME')
            out4 = slim.convolution(out4, param[5], 1, 1)

            output = tf.concat([out1, out2, out3, out4], 3)

            return output
  def create_model(self, model_input, vocab_size, num_frames, **unused_params):
    output = model_input

    hidden_size = 1024
    output = slim.convolution(output, hidden_size, [8], stride = 2, padding = 'SAME')

    tmp_state = output
    output = slim.convolution(output, hidden_size, [3], stride = 1, padding = 'SAME')
    output = slim.convolution(output, hidden_size, [3], stride = 1, padding = 'SAME')
    output = output + tmp_state

    tmp_state = output
    output = slim.convolution(output, hidden_size, [3], stride = 1, padding = 'SAME')
    output = slim.convolution(output, hidden_size, [3], stride = 1, padding = 'SAME')
    output = output + tmp_state

    tmp_state = output
    output = slim.convolution(output, hidden_size, [3], stride=2, padding="SAME")
    output = slim.convolution(output, hidden_size, [3], stride=1, padding='SAME')
    tmp_state = slim.pool(tmp_state, [2], "AVG", stride=2, padding="SAME")
    output = output + tmp_state

    tmp_state = output
    output = slim.convolution(output, hidden_size, [3], stride = 1, padding = 'SAME')
    output = slim.convolution(output, hidden_size, [3], stride = 1, padding = 'SAME')
    output = output + tmp_state

    tmp_state = output
    output = slim.convolution(output, hidden_size, [3], stride = 1, padding = 'SAME')
    output = slim.convolution(output, hidden_size, [3], stride = 1, padding = 'SAME')
    output = output + tmp_state

    tmp_state = output
    output = slim.convolution(output, hidden_size, [3], stride = 1, padding = 'SAME')
    output = slim.convolution(output, hidden_size, [3], stride = 1, padding = 'SAME')
    output = output + tmp_state

    tmp_state = output
    output = slim.convolution(output, hidden_size, [3], stride=2, padding="SAME")
    output = slim.convolution(output, hidden_size, [3], stride=1, padding='SAME')
    tmp_state = slim.pool(tmp_state, [2], "AVG", stride=2, padding="SAME")
    output = output + tmp_state

    tmp_state = output
    output = slim.convolution(output, hidden_size, [3], stride = 1, padding = 'SAME')
    output = slim.convolution(output, hidden_size, [3], stride = 1, padding = 'SAME')
    output = output + tmp_state

    tmp_state = output
    output = slim.convolution(output, hidden_size, [3], stride = 1, padding = 'SAME')
    output = slim.convolution(output, hidden_size, [3], stride = 1, padding = 'SAME')
    output = output + tmp_state

    tmp_state = output
    output = slim.convolution(output, hidden_size, [3], stride = 1, padding = 'SAME')
    output = slim.convolution(output, hidden_size, [3], stride = 1, padding = 'SAME')
    output = output + tmp_state

    tmp_state = output
    output = slim.convolution(output, hidden_size, [3], stride=2, padding="SAME")
    output = slim.convolution(output, hidden_size, [3], stride=1, padding='SAME')
    tmp_state = slim.pool(tmp_state, [2], "AVG", stride=2, padding="SAME")
    output = output + tmp_state

    tmp_state = output
    output = slim.convolution(output, hidden_size, [3], stride=1, padding='SAME')
    output = slim.convolution(output, hidden_size, [3], stride=1, padding='SAME')
    output = output + tmp_state

    tmp_state = output
    output = slim.convolution(output, hidden_size, [3], stride=1, padding='SAME')
    output = slim.convolution(output, hidden_size, [3], stride=1, padding='SAME')
    output = output + tmp_state

    # output = tf.contrib.layers.batch_norm(output, center=True, scale=True, is_training=True, scope=None)
    output = slim.pool(output, [2], "AVG", stride=2, padding="SAME")

    output = slim.flatten(output)

    output = slim.fully_connected(output, 2048)
    # output = tf.contrib.layers.batch_norm(output,center = True, scale = True, is_training = True, scope = None)
    output = slim.dropout(output)

    output = slim.fully_connected(output, 2048)
    # output = tf.contrib.layers.batch_norm(output,center = True, scale = True, is_training = True, scope = None)
    output = slim.dropout(output)

    output = slim.fully_connected(output, vocab_size, activation_fn = tf.nn.sigmoid)

    return {"predictions": output}
Exemple #22
0
def resnetb_block(x, numFmOut, bottleneck_size, stride):
    """Defines a single resnetB block, according to paper
    Args: 
      x: block input, 5D tensor
      base_fm: base number of feature maps in the block
    Returns:
      output: 5D tensor, output of the block 
    """
    # Number of input fms
    numFmIn = x.get_shape().as_list()[-1]
    # Determine if its a reduction
    if numFmOut > numFmIn:
        increase_dim = True
    else:
        increase_dim = False
    # First 1x1 layer
    with tf.variable_scope('conv1x1x1_1'):
        layer = slim.convolution(x, bottleneck_size, 1, stride=1)
    # Second 3x3 layer, apply stride here
    with tf.variable_scope('conv3x3x3_2'):
        layer = slim.convolution(layer, bottleneck_size, 3, stride=stride)
    # Third layer, restore FM size
    with tf.variable_scope('conv1x1x1_3'):
        layer = slim.convolution(layer,
                                 numFmOut,
                                 1,
                                 stride=1,
                                 activation_fn=None)
    # When the channels of input layer and conv2 does not match, add zero pads to increase the
    # depth of input layers
    adjusted_input = x
    if stride == 2:
        # take care of 1D<->2D<->3D
        if len(x.get_shape().as_list()) == 5:
            adjusted_input = tf.nn.pool(adjusted_input, [2, 2, 2],
                                        "AVG",
                                        padding='SAME',
                                        strides=[2, 2, 2])
        elif len(x.get_shape().as_list()) == 4:
            adjusted_input = tf.nn.pool(adjusted_input, [2, 2],
                                        "AVG",
                                        padding='SAME',
                                        strides=[2, 2])
        else:
            adjusted_input = tf.nn.pool(adjusted_input, [2],
                                        "AVG",
                                        padding='SAME',
                                        strides=[2])
    if increase_dim:
        lower_pad = math.ceil((numFmOut - numFmIn) / 2)
        upper_pad = (numFmOut - numFmIn) - lower_pad
        # take care of 1D<->2D<->3D
        if len(x.get_shape().as_list()) == 5:
            adjusted_input = tf.pad(
                adjusted_input,
                [[0, 0], [0, 0], [0, 0], [0, 0], [lower_pad, upper_pad]])
        elif len(x.get_shape().as_list()) == 4:
            adjusted_input = tf.pad(
                adjusted_input,
                [[0, 0], [0, 0], [0, 0], [lower_pad, upper_pad]])
        else:
            adjusted_input = tf.pad(adjusted_input,
                                    [[0, 0], [0, 0], [lower_pad, upper_pad]])
    # Residual connection + activation
    output = tf.nn.relu(adjusted_input + layer)
    return output
Exemple #23
0
def Inception3D(x, mdlParams, placeholders=None):
    """Defines the Inception3D architecture from the paper "A Deep Learning Approach for Pose Estimation from Volumetric OCT Data"
    Args:
      x: 5D input tensor, usually a placeholder of shape [batchSize, width, height, depth, channel]
      mdlParams: dictionary, contains model configuration
      is_training: boolean, indicates if it is training or evaluation
    Returns:
      output: 2D tensor of shape [batchSize, numberOfOutputs]
    """
    with tf.variable_scope('Inception3D'):
        with slim.arg_scope(
            [slim.convolution],
                padding='SAME',
                activation_fn=tf.nn.relu,
                weights_initializer=tf.truncated_normal_initializer(
                    stddev=0.01),
                normalizer_fn=slim.batch_norm,
                normalizer_params={
                    'is_training': placeholders['train_state'],
                    'epsilon': 0.0001,
                    'decay': 0.9,
                    'center': True,
                    'scale': True,
                    'activation_fn': None,
                    'updates_collections': tf.GraphKeys.UPDATE_OPS,
                    'fused': False
                }):
            # Initial part
            with tf.variable_scope('Initial'):
                layer = slim.convolution(x, 48, 3, stride=1, scope='conv1')
                layer = slim.convolution(layer, 64, 3, stride=2, scope='conv2')
                layer = slim.convolution(layer, 64, 3, stride=1, scope='conv3')
                layer = slim.convolution(layer, 64, 3, stride=1, scope='conv4')
                layer = slim.convolution(layer, 64, 3, stride=1, scope='conv5')
            # Inception modules
            with tf.variable_scope('Inception_Modules'):
                # Iterate through all modlues
                for i in range(len(mdlParams['num_inception_blocks'])):
                    with tf.variable_scope('Module_%d' % (i)):
                        # Save for long-range connections
                        module_input = layer
                        # Input feature map size for the first block, needed for long range connections
                        input_size = module_input.get_shape().as_list()[-1]
                        # First, apply reduction block
                        with tf.variable_scope('Reduction_Block'):
                            layer = inception_block(
                                layer,
                                mdlParams['inception_dims_reduction'][i, :],
                                stride=2,
                                scale=mdlParams['inception_block_scale'])
                        # Input size for the rest of the modules, needed for long range connections
                        red_fm_size = mdlParams['inception_dims_reduction'][
                            i, 0] + mdlParams['inception_dims_reduction'][
                                i,
                                2] + mdlParams['inception_dims_reduction'][i,
                                                                           4]
                        # Then, add normal inception blocks
                        for j in range(mdlParams['num_inception_blocks'][i]):
                            with tf.variable_scope('Normal_Block_%d' % (j)):
                                layer = inception_block(
                                    layer,
                                    mdlParams['inception_dims'][i, :],
                                    stride=1,
                                    scale=mdlParams['inception_block_scale'],
                                    last=(
                                        j ==
                                        mdlParams['num_inception_blocks'][i] -
                                        1))
                        # If desired, add long range connection from the input
                        if mdlParams['long_range_connection'][i] > 0:
                            # Resize input, depending on connection type
                            # If long-range residual connections are used
                            if mdlParams['long_range_connection'][i] == 1:
                                with tf.variable_scope('resize_module'):
                                    adjusted_input = slim.convolution(
                                        module_input, red_fm_size, 1, stride=2)
                                # Add scaled residual connection
                                layer = mdlParams[
                                    'module_scale'] * layer + adjusted_input
                            # If long-range dense connections are used
                            elif mdlParams['long_range_connection'][i] == 2:
                                pooled_input = slim.layers.avg_pool3d(
                                    module_input, 2)
                                lower_pad = math.ceil(
                                    (red_fm_size - input_dim) / 2)
                                upper_pad = (red_fm_size -
                                             input_dim) - lower_pad
                                # Pad
                                adjusted_input = tf.pad(
                                    pooled_input,
                                    [[0, 0], [0, 0], [0, 0], [0, 0],
                                     [lower_pad, upper_pad]])
                                # Concat and adjust size with conv
                                target_size = layer.get_shape().as_list()[-1]
                                layer = tf.concat([layer, adjusted_input], 4)
                                layer = slim.convolution(
                                    layer,
                                    target_size,
                                    1,
                                    scope='long_range_resize')
            # GAP for 1D,2D,3D
            if len(layer.get_shape().as_list()) == 5:
                layer = math_ops.reduce_mean(layer,
                                             axis=[1, 2, 3],
                                             keep_dims=False,
                                             name='global_pool')
            elif len(layer.get_shape().as_list()) == 4:
                layer = math_ops.reduce_mean(layer,
                                             axis=[1, 2],
                                             keep_dims=False,
                                             name='global_pool')
            else:
                layer = math_ops.reduce_mean(layer,
                                             axis=[1],
                                             keep_dims=False,
                                             name='global_pool')
            # Dense output layer
            output = slim.layers.fully_connected(layer,
                                                 len(mdlParams['tar_range']),
                                                 activation_fn=None)
    return output
Exemple #24
0
def ResNeXt3D(x, mdlParams, placeholders=None):
    """Defines the ResNetB3D architecture from the paper "A Deep Learning Approach for Pose Estimation from Volumetric OCT Data"
    Args:
      x: 5D input tensor, usually a placeholder of shape [batchSize, width, height, depth, channel]
      mdlParams: dictionary, contains model configuration
      is_training: boolean, indicates if it is training or evaluation
    Returns:
      output: 2D tensor of shape [batchSize, numberOfOutputs]
    """
    with tf.variable_scope('ResNetA3D'):
        with slim.arg_scope(
            [slim.convolution],
                padding='SAME',
                activation_fn=tf.nn.relu,
                weights_initializer=tf.truncated_normal_initializer(
                    stddev=0.01),
                normalizer_fn=slim.batch_norm,
                normalizer_params={
                    'is_training': placeholders['train_state'],
                    'epsilon': 0.0001,
                    'decay': 0.9,
                    'center': True,
                    'scale': True,
                    'activation_fn': None,
                    'updates_collections': tf.GraphKeys.UPDATE_OPS,
                    'fused': False
                }):
            # Initial part
            with tf.variable_scope('Initial'):
                layer = slim.convolution(x, 48, 3, stride=1, scope='conv1')
                layer = slim.convolution(layer, 64, 3, stride=2, scope='conv2')
                layer = slim.convolution(layer, 64, 3, stride=1, scope='conv3')
                layer = slim.convolution(layer, 64, 3, stride=1, scope='conv4')
                layer = slim.convolution(layer, 64, 3, stride=1, scope='conv5')
            # Resnet modules
            with tf.variable_scope('Resnet_modules'):
                # Initial output feature map size
                output_fm = mdlParams['ResNeXt3D_FM']
                # Initial feature map sizes for bottleneck
                reduced_fm = mdlParams['ResNeXt3D_Red_FM']
                # Iterate through all modules
                for i in range(len(mdlParams['ResNeXt3D_Size'])):
                    with tf.variable_scope('Module_%d' % (i)):
                        # Iterate through all blocks inside the module
                        for j in range(mdlParams['ResNeXt3D_Size'][i]):
                            with tf.variable_scope('Block_%d' % (j)):
                                # Set desired output feature map dimension of the block and the desired stride for the first block in the module
                                if j == 0:
                                    output_fm = 2 * output_fm
                                    reduced_fm = 2 * reduced_fm
                                    block_stride = mdlParams[
                                        'ResNeXt3D_Stride'][i]
                                else:
                                    block_stride = 1
                                layer = resnext_block(layer, output_fm,
                                                      reduced_fm, block_stride,
                                                      mdlParams['cardinality'])
            # GAP for 1D,2D,3D
            if len(layer.get_shape().as_list()) == 5:
                layer = math_ops.reduce_mean(layer,
                                             axis=[1, 2, 3],
                                             keep_dims=False,
                                             name='global_pool')
            elif len(layer.get_shape().as_list()) == 4:
                layer = math_ops.reduce_mean(layer,
                                             axis=[1, 2],
                                             keep_dims=False,
                                             name='global_pool')
            else:
                layer = math_ops.reduce_mean(layer,
                                             axis=[1],
                                             keep_dims=False,
                                             name='global_pool')
            # Dense output layer
            output = slim.layers.fully_connected(layer,
                                                 len(mdlParams['tar_range']),
                                                 activation_fn=None)
    return output
Exemple #25
0
def inception_block(x, inception_dims, stride, scale, last=False):
    """Defines a single inception block, according to paper
    Args: 
      x: block input, 5D tensor
      inception_dims: 1D array, number of feature maps for unit in the block
      stride: int, contains the stride of the core convolutions, to be used for resizing the input
      scale: scale of the residual, see paper
      last: boolean, indicates whether this is the last block in a chain
    Returns:
      output: 5D tensor, output of the block 
    """
    # First: 1x1 layer
    with tf.variable_scope('conv1x1x1_1'):
        conv1x1x1_1 = slim.convolution(x, inception_dims[0], 1, stride=stride)
    # Second: 1x1 with followed 3x3
    with tf.variable_scope('conv1x1x1_2'):
        conv1x1x1_2 = slim.convolution(x, inception_dims[1], 1)
    with tf.variable_scope('conv3x3x3_2'):
        conv3x3x3_2 = slim.convolution(conv1x1x1_2,
                                       inception_dims[2],
                                       3,
                                       stride=stride)
    # Third: 1x1 with followed 3x3 3x3
    with tf.variable_scope('conv1x1x1_3'):
        conv1x1x1_3 = slim.convolution(x, inception_dims[3], 1)
    with tf.variable_scope('conv3x3x3_3_1'):
        conv3x3x3_3_1 = slim.convolution(conv1x1x1_3, inception_dims[4], 3)
    with tf.variable_scope('conv3x3x3_3_2'):
        conv3x3x3_3_2 = slim.convolution(conv3x3x3_3_1,
                                         inception_dims[4],
                                         3,
                                         stride=stride)
    # Concat
    output = tf.concat([conv1x1x1_1, conv3x3x3_2, conv3x3x3_3_2], 4)
    # Resize input for residual connections
    if stride == 1:
        # Expand concat tensor to original size
        with tf.variable_scope('expand_output'):
            expanded_output = slim.convolution(output,
                                               x.get_shape().as_list()[-1],
                                               1,
                                               activation_fn=None)
            # Residual connection with scale
            if last:
                output = scale * expanded_output + x
            else:
                output = scale * expanded_output + x
            output = tf.nn.relu(output)
    else:
        # This is a reduction block, therefore adjust input instead
        pooled_input = slim.layers.avg_pool3d(x, 2)
        lower_pad = math.ceil(
            (output.get_shape().as_list()[-1] - x.get_shape().as_list()[-1]) /
            2)
        upper_pad = (output.get_shape().as_list()[-1] -
                     x.get_shape().as_list()[-1]) - lower_pad
        # Pad
        adjusted_input = tf.pad(
            pooled_input,
            [[0, 0], [0, 0], [0, 0], [0, 0], [lower_pad, upper_pad]])
        # Residual connection with scale
        output = scale * output + adjusted_input
    return output
Exemple #26
0
def resnext_block(x, numFmOut, bottleneck_size, stride, cardinality):
    """Defines a single resnext block, according to paper
    Args: 
      x: block input, 5D tensor
      numFmOut: int, number of feature maps to be outputted
      bottleneck_size: int, number of feature maps for every paths
      stride: int, stride for the 3x3x3 convolutions
      cardinality: int, number of paths
    Returns:
      output: 5D tensor, output of the block 
    """
    # Number of input fms
    numFmIn = x.get_shape().as_list()[-1]
    # Determine if its a reduction
    if numFmOut > numFmIn:
        increase_dim = True
    else:
        increase_dim = False
    # Split into paths
    all_paths = []
    for i in range(cardinality):
        # First, 1x1 to bring FMs down to bottleneck size
        with tf.variable_scope('conv1x1x1_%d' % (i)):
            layer = slim.convolution(x, bottleneck_size, 1, stride=1)
        # Then, 3x3, apply stride
        with tf.variable_scope('conv3x3x3_%d' % (i)):
            layer = slim.convolution(layer, bottleneck_size, 3, stride=stride)
        # Add to list of paths
        all_paths.append(layer)
    # Concat all paths
    layer = tf.concat(all_paths, axis=4, name='concat_paths')
    # Restore FM size from concatenated paths
    with tf.variable_scope('conv1x1x1_restore'):
        layer = slim.convolution(layer,
                                 numFmOut,
                                 1,
                                 stride=1,
                                 activation_fn=None)
    # When the channels of input layer and conv2 does not match, we add zero pads to increase the
    # depth of input layers
    adjusted_input = x
    if stride == 2:
        # take care of 1D<->2D<->3D
        if len(x.get_shape().as_list()) == 5:
            adjusted_input = tf.nn.pool(adjusted_input, [2, 2, 2],
                                        "AVG",
                                        padding='SAME',
                                        strides=[2, 2, 2])
        elif len(x.get_shape().as_list()) == 4:
            adjusted_input = tf.nn.pool(adjusted_input, [2, 2],
                                        "AVG",
                                        padding='SAME',
                                        strides=[2, 2])
        else:
            adjusted_input = tf.nn.pool(adjusted_input, [2],
                                        "AVG",
                                        padding='SAME',
                                        strides=[2])
    if increase_dim:
        lower_pad = math.ceil((numFmOut - numFmIn) / 2)
        upper_pad = (numFmOut - numFmIn) - lower_pad
        # take care of 1D<->2D<->3D
        if len(x.get_shape().as_list()) == 5:
            adjusted_input = tf.pad(
                adjusted_input,
                [[0, 0], [0, 0], [0, 0], [0, 0], [lower_pad, upper_pad]])
        elif len(x.get_shape().as_list()) == 4:
            adjusted_input = tf.pad(
                adjusted_input,
                [[0, 0], [0, 0], [0, 0], [lower_pad, upper_pad]])
        else:
            adjusted_input = tf.pad(adjusted_input,
                                    [[0, 0], [0, 0], [lower_pad, upper_pad]])
    # Residual connection + activation
    output = tf.nn.relu(adjusted_input + layer)
    return output
    def build(self):
        # https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/layers/python/layers/layers.py#L429
        batch_norm_params = {
            'is_training': self.is_training,
            'center': True,
            'scale': False,
            'decay': 0.9,
            'epsilon': 0.001,
            'fused': True,
            'zero_debias_moving_mean': True
        }
        activation = tf.nn.relu6

        net = self.input_batch
        features = []

        for i in range(1):
            net = slim.convolution(
                net,
                int(128), [3, 3],
                1,
                padding='SAME',
                scope='preconv%d' % i,
                weights_initializer=tf.truncated_normal_initializer(
                    mean=0.0, stddev=0.01),
                normalizer_fn=slim.batch_norm,
                normalizer_params=batch_norm_params,
                activation_fn=activation)
            features.append(net)
            net = slim.convolution(
                net,
                int(256), [3, 3],
                1,
                padding='SAME',
                scope='preconv%d-2' % i,
                weights_initializer=tf.truncated_normal_initializer(
                    mean=0.0, stddev=0.01),
                normalizer_fn=slim.batch_norm,
                normalizer_params=batch_norm_params,
                activation_fn=activation)
            features.append(net)
            net = slim.convolution(
                net,
                int(32), [1, 1],
                1,
                padding='SAME',
                scope='preconv%d-b' % i,
                weights_initializer=tf.truncated_normal_initializer(
                    mean=0.0, stddev=0.01),
                normalizer_fn=slim.batch_norm,
                normalizer_params=batch_norm_params,
                activation_fn=activation)

        conv_pool_size = 4
        for i in range(conv_pool_size):
            net = slim.convolution(
                net,
                int(64 * (2**i)), [3, 3],
                1,
                padding='SAME',
                scope='conv%d' % (i + 1),
                weights_initializer=tf.truncated_normal_initializer(
                    mean=0.0, stddev=0.01),
                normalizer_fn=slim.batch_norm,
                normalizer_params=batch_norm_params,
                activation_fn=activation)
            net = slim.max_pool2d(net, [3, 3],
                                  2,
                                  padding='SAME',
                                  scope='pool%d' % (i + 1))
            features.append(net)

        net = slim.convolution(
            net,
            int(256), [3, 3],
            1,
            padding='SAME',
            scope='conv%d' % (conv_pool_size + 1),
            weights_initializer=tf.truncated_normal_initializer(mean=0.0,
                                                                stddev=0.01),
            normalizer_fn=slim.batch_norm,
            normalizer_params=batch_norm_params,
            activation_fn=activation)
        features.append(net)

        # upsample
        features_up = [
            tf.image.resize_bilinear(f, (112, 112)) for f in features
        ]
        net = tf.concat(axis=3, values=features_up, name='concat_features')

        net = slim.convolution(
            net,
            int(256), [1, 1],
            1,
            padding='SAME',
            scope='bottleneck',
            weights_initializer=tf.truncated_normal_initializer(mean=0.0,
                                                                stddev=0.01),
            normalizer_fn=slim.batch_norm,
            normalizer_params=batch_norm_params,
            activation_fn=activation)

        net = slim.convolution(
            net,
            1, [5, 5],
            1,
            padding='SAME',
            scope='conv_last',
            weights_initializer=tf.truncated_normal_initializer(mean=0.0,
                                                                stddev=0.01),
            normalizer_fn=None,
            activation_fn=None)
        net = tf.image.resize_bilinear(net, (224, 224))

        self.logit = net
        self.output = tf.nn.sigmoid(net, 'visualization')
        if self.unet_weight:
            w = self.weight_batch
        else:
            w = 1.0

        self.loss = tf.losses.sigmoid_cross_entropy(
            multi_class_labels=self.mask_batch,
            logits=self.logit,
            weights=w,
            reduction=Reduction.SUM_BY_NONZERO_WEIGHTS)
        self.loss_opt = self.loss
        return net
  def create_model(self, model_input, vocab_size, num_frames, **unused_params):
    output = model_input

    output = slim.convolution(output, 1024, [3], stride = 1, padding = "SAME")
    output = slim.convolution(output, 1024, [3], stride = 1, padding = "SAME")
    # output = slim.convolution(output, 512, [3], stride = 1, padding = "SAME")
    output = tf.contrib.layers.batch_norm(output,center = True, scale = True, is_training = True, scope = None)
    output = slim.pool(output, [2], "MAX", stride = 2)

    # output = slim.convolution(output, 512, [3], stride = 1, padding = "SAME")
    output = slim.convolution(output, 1024, [3], stride = 1, padding = "SAME")
    output = slim.convolution(output, 1024, [3], stride = 1, padding = "SAME")
    output = tf.contrib.layers.batch_norm(output,center = True, scale = True, is_training = True, scope = None)
    output = slim.pool(output, [2], "MAX", stride = 2)

    output = slim.convolution(output, 1024, [3], stride = 1, padding = "SAME")
    output = slim.convolution(output, 1024, [3], stride = 1, padding = "SAME")
    output = slim.convolution(output, 1024, [3], stride = 1, padding = "SAME")
    output = tf.contrib.layers.batch_norm(output,center = True, scale = True, is_training = True, scope = None)
    output = slim.pool(output, [3], "MAX", stride = 2)

    output = slim.convolution(output, 1024, [3], stride=1, padding="SAME")
    output = slim.convolution(output, 1024, [3], stride=1, padding="SAME")
    output = slim.convolution(output, 1024, [3], stride=1, padding="SAME")
    output = tf.contrib.layers.batch_norm(output,center = True, scale = True, is_training = True, scope = None)
    output = slim.pool(output, [3], "MAX", stride=2)

    output = slim.convolution(output, 1024, [3], stride=1, padding="SAME")
    output = slim.convolution(output, 1024, [3], stride=1, padding="SAME")
    output = slim.convolution(output, 1024, [3], stride=1, padding="SAME")
    output = tf.contrib.layers.batch_norm(output,center = True, scale = True, is_training = True, scope = None)
    output = slim.pool(output, [2], "MAX", stride=2)



    output = slim.flatten(output)

    output = slim.fully_connected(output, 4096)
    output = tf.contrib.layers.batch_norm(output,center = True, scale = True, is_training = True, scope = None)
    output = slim.dropout(output)

    output = slim.fully_connected(output, 4096)
    output = tf.contrib.layers.batch_norm(output,center = True, scale = True, is_training = True, scope = None)
    output = slim.dropout(output)

    output = slim.fully_connected(output, vocab_size, activation_fn = tf.nn.sigmoid)

    return {"predictions": output}
Exemple #29
0
    def visual_features(self):
        input_images = self.input_images
        tf.add_to_collection("input_images", input_images)

        images = -1.0 + 2.0 * tf.cast(input_images, tf.float32) / 255.0

        bsize =  self._bsize                # Batch size
        lblen = self._args.lookback_length  # Lookback length
        sqlen = self._sqlen                 # Sequence length

        images = tf.reshape(images, shape=[bsize, lblen + sqlen,
                                           ds.HEIGHT, ds.WIDTH, ds.CHANNELS])

        net = slim.convolution(images,
                               num_outputs=64,
                               kernel_size=[3, 12, 12],
                               stride=[1, 6, 6],
                               padding="VALID")
        net = tf.nn.dropout(x=net, keep_prob=self._keep_prob)
        # Height x Width x Channel
        hwc = np.prod(net.get_shape().as_list()[2:])
        aux1 = slim.fully_connected(tf.reshape(net[:, -sqlen:, :, :, :],
                                               [bsize, sqlen, hwc]),
                                    128, activation_fn=None)

        net = slim.convolution(net,
                               num_outputs=64,
                               kernel_size=[2, 5, 5],
                               stride=[1, 2, 2],
                               padding="VALID")
        net = tf.nn.dropout(x=net, keep_prob=self._keep_prob)
        # Height x Width x Channel
        hwc = np.prod(net.get_shape().as_list()[2:])
        aux2 = slim.fully_connected(tf.reshape(net[:, -sqlen:, :, :, :],
                                               [bsize, sqlen, hwc]),
                                    128, activation_fn=None)

        net = slim.convolution(net,
                               num_outputs=64,
                               kernel_size=[2, 5, 5],
                               stride=[1, 1, 1],
                               padding="VALID")
        net = tf.nn.dropout(x=net, keep_prob=self._keep_prob)
        # Height x Width x Channel
        hwc = np.prod(net.get_shape().as_list()[2:])
        aux3 = slim.fully_connected(tf.reshape(net[:, -sqlen:, :, :, :],
                                               [bsize, sqlen, hwc]),
                                    128, activation_fn=None)

        net = slim.convolution(net,
                               num_outputs=64,
                               kernel_size=[2, 5, 5],
                               stride=[1, 1, 1],
                               padding="VALID")
        net = tf.nn.dropout(x=net, keep_prob=self._keep_prob)
        # At this point the tensor 'net' is of shape
        # batch_size x seq_len x Height x Width x Channel
        # Height x Width x Channel
        hwc = np.prod(net.get_shape().as_list()[2:])
        aux4 = slim.fully_connected(tf.reshape(net,
                                               [bsize, sqlen, hwc]),
                                    128, activation_fn=None)

        net = slim.fully_connected(tf.reshape(net,
                                              [bsize, sqlen, hwc]),
                                   1024, activation_fn=tf.nn.relu)
        net = tf.nn.dropout(x=net, keep_prob=self._keep_prob)
        net = slim.fully_connected(net, 512, activation_fn=tf.nn.relu)
        net = tf.nn.dropout(x=net, keep_prob=self._keep_prob)
        net = slim.fully_connected(net, 256, activation_fn=tf.nn.relu)
        net = tf.nn.dropout(x=net, keep_prob=self._keep_prob)
        net = slim.fully_connected(net, 128, activation_fn=None)

        # aux[1-4] are residual connections (shortcuts)
        visual_features = _layer_norm(tf.nn.elu(
            net + aux1 + aux2 + aux3 + aux4))

        num_outputs = visual_features.get_shape().as_list()[-1]
        visual_features = tf.reshape(visual_features,
                                     [bsize, sqlen, num_outputs])

        visual_features = tf.nn.dropout(x=visual_features,
                                        keep_prob=self._keep_prob)
        return visual_features
 def create_model(self, model_input, vocab_size, num_frames, **unused_params):
   lstm_size = FLAGS.lstm_cells
   number_of_layers = FLAGS.lstm_layers
 
   stacked_lstm = tf.contrib.rnn.MultiRNNCell(
     [
       tf.contrib.rnn.BasicLSTMCell(
         lstm_size, forget_bias=1.0)
       for _ in range(number_of_layers)
     ])
 
   loss = 0.0
 
   output, state = tf.nn.dynamic_rnn(stacked_lstm, model_input,
                                     sequence_length=num_frames,
                                     dtype=tf.float32)
 
   output = slim.convolution(output, 1024, [3], stride=1, padding="SAME")
   output = slim.convolution(output, 1024, [3], stride=1, padding="SAME")
   # output = slim.convolution(output, 512, [3], stride = 1, padding = "SAME")
   output = tf.contrib.layers.batch_norm(output, center=True, scale=True, is_training=True, scope=None)
   output = slim.pool(output, [2], "MAX", stride=2)
 
   # output = slim.convolution(output, 512, [3], stride = 1, padding = "SAME")
   output = slim.convolution(output, 1024, [3], stride=1, padding="SAME")
   output = slim.convolution(output, 1024, [3], stride=1, padding="SAME")
   output = tf.contrib.layers.batch_norm(output, center=True, scale=True, is_training=True, scope=None)
   output = slim.pool(output, [2], "MAX", stride=2)
 
   output = slim.convolution(output, 1024, [3], stride=1, padding="SAME")
   output = slim.convolution(output, 1024, [3], stride=1, padding="SAME")
   output = slim.convolution(output, 1024, [3], stride=1, padding="SAME")
   output = tf.contrib.layers.batch_norm(output, center=True, scale=True, is_training=True, scope=None)
   output = slim.pool(output, [3], "MAX", stride=2)
 
   output = slim.convolution(output, 1024, [3], stride=1, padding="SAME")
   output = slim.convolution(output, 1024, [3], stride=1, padding="SAME")
   output = slim.convolution(output, 1024, [3], stride=1, padding="SAME")
   output = tf.contrib.layers.batch_norm(output, center=True, scale=True, is_training=True, scope=None)
   output = slim.pool(output, [3], "MAX", stride=2)
 
   output = slim.convolution(output, 1024, [3], stride=1, padding="SAME")
   output = slim.convolution(output, 1024, [3], stride=1, padding="SAME")
   output = slim.convolution(output, 1024, [3], stride=1, padding="SAME")
   output = tf.contrib.layers.batch_norm(output, center=True, scale=True, is_training=True, scope=None)
   output = slim.pool(output, [2], "MAX", stride=2)
 
   output = slim.convolution(output, 1024, [3], stride=1, padding="SAME")
   output = slim.convolution(output, 1024, [3], stride=1, padding="SAME")
   output = slim.convolution(output, 1024, [3], stride=1, padding="SAME")
   output = tf.contrib.layers.batch_norm(output, center=True, scale=True, is_training=True, scope=None)
   output = slim.pool(output, [2], "MAX", stride=1)
 
   output = slim.convolution(output, 1024, [3], stride=1, padding="SAME")
   output = slim.convolution(output, 1024, [3], stride=1, padding="SAME")
   output = slim.convolution(output, 1024, [3], stride=1, padding="SAME")
   output = tf.contrib.layers.batch_norm(output, center=True, scale=True, is_training=True, scope=None)
   output = slim.pool(output, [2], "MAX", stride=1)
 
   output = slim.convolution(output, 1024, [3], stride=1, padding="SAME")
   output = slim.convolution(output, 1024, [3], stride=1, padding="SAME")
   output = slim.convolution(output, 1024, [3], stride=1, padding="SAME")
   output = tf.contrib.layers.batch_norm(output, center=True, scale=True, is_training=True, scope=None)
   output = slim.pool(output, [2], "MAX", stride=1)
 
   output = slim.convolution(output, 512, [3], stride=1, padding="SAME")
   output = slim.convolution(output, 512, [3], stride=1, padding="SAME")
   output = slim.convolution(output, 512, [3], stride=1, padding="SAME")
   output = tf.contrib.layers.batch_norm(output, center=True, scale=True, is_training=True, scope=None)
   output = slim.pool(output, [2], "MAX", stride=1)
 
   output = slim.flatten(output)
 
   output = slim.fully_connected(output, 4096)
   output = tf.contrib.layers.batch_norm(output, center=True, scale=True, is_training=True, scope=None)
   output = slim.dropout(output)
 
   output = slim.fully_connected(output, 4096)
   output = tf.contrib.layers.batch_norm(output, center=True, scale=True, is_training=True, scope=None)
   output = slim.dropout(output)
 
   output = slim.fully_connected(output, vocab_size, activation_fn=tf.nn.sigmoid)
 
   return {"predictions": output}
                                         activation_fn=tf.identity,
                                         scope='g_conv6')
    conv6 = tf.nn.relu(conv6)

    conv7 = slim.convolution2d_transpose(conv6,
                                         3,
                                         3,
                                         stride=1,
                                         activation_fn=tf.identity,
                                         scope='g_conv7')
    G = tf.nn.tanh(conv7)

with tf.variable_scope("D_net"):
    conv1 = slim.convolution(x,
                             64,
                             5,
                             stride=2,
                             activation_fn=tf.identity,
                             scope='d_conv1')
    conv1 = leaky_relu(conv1)

    conv2 = slim.convolution(conv1,
                             128,
                             5,
                             stride=2,
                             normalizer_fn=slim.batch_norm,
                             activation_fn=tf.identity,
                             scope='d_conv2')
    conv2 = leaky_relu(conv2)

    conv3 = slim.convolution(conv2,
                             256,