def context_network(low_res):
    # conv1
    with tf.variable_scope('context/conv1') as scope:
        kernel = util._variable_with_weight_decay('weights', shape=[5, 5, 3, 64],
                                             stddev=1e-4, wd=0.0)
        conv = tf.nn.conv2d(low_res, kernel, [1, 2, 2, 1], padding='SAME')
        biases = tf.get_variable('biases', [64], initializer=tf.constant_initializer(0.0))
        bias = tf.reshape(tf.nn.bias_add(conv, biases), conv.get_shape().as_list())
        conv1 = tf.nn.relu(bias, name=scope.name)
        _activation_summary(conv1)

    # conv2
    with tf.variable_scope('context/conv2') as scope:
        kernel = util._variable_with_weight_decay('weights', shape=[5, 5, 64, 64],
                                             stddev=1e-4, wd=0.0)
        conv = tf.nn.conv2d(conv1, kernel, [1, 1, 1, 1], padding='SAME')
        biases = tf.get_variable('biases', [64], initializer=tf.constant_initializer(0.1))
        bias = tf.reshape(tf.nn.bias_add(conv, biases), conv.get_shape().as_list())
        conv2 = tf.nn.relu(bias, name=scope.name)
        _activation_summary(conv2)

    # conv3
    with tf.variable_scope('context/conv3') as scope:
        kernel = util._variable_with_weight_decay('weights', shape=[7, 7, 64, 2],
                                             stddev=1e-4, wd=0.0)
        conv = tf.nn.conv2d(conv2, kernel, [1, 1, 1, 1], padding='SAME')
        biases = tf.get_variable('biases', [2], initializer=tf.constant_initializer(0.1))
        bias = tf.reshape(tf.nn.bias_add(conv, biases), conv.get_shape().as_list())
        conv3 = tf.nn.relu(bias, name=scope.name)
        _activation_summary(conv3)

    #convert to 1-d for inputting into LSTM
    return tf.reshape(conv3, [FLAGS.batch_size, -1])
Esempio n. 2
0
def conv_layer(l_input, kernel_shape, scope):
    '''
    Convolutional layers wrapper function.

    :feats: input of conv layer
    :kernel_shape: shape of filter

    :returns:
       :conv_drop: tensor variable
       :kernel: tensor variable
    '''
    
    kernel = _variable_with_weight_decay(
        'weights',
        shape=kernel_shape,
        wd_value=None,
        use_fp16=FLAGS.use_fp16)

    conv = tf.nn.conv2d(l_input, kernel,
                        [1, FLAGS.temporal_stride, 1, 1],
                         padding='SAME')

    biases = _variable('biases', [FLAGS.num_filters],
                                tf.constant_initializer(-0.05),
                                FLAGS.use_fp16)
        
    bias = tf.nn.bias_add(conv, biases)
    conv = tf.nn.relu(bias, name=scope.name)
    _activation_summary(conv)

    # dropout
    conv_drop = tf.nn.dropout(conv, FLAGS.keep_prob)
    return conv_drop, kernel
 def conv_layer_with_bn(self,
                        inputT,
                        shape,
                        train_phase,
                        activation=True,
                        name=None):
     in_channel = shape[2]
     out_channel = shape[3]
     k_size = shape[0]
     with tf.variable_scope(name, reuse=tf.AUTO_REUSE) as scope:
         kernel = util._variable_with_weight_decay(
             'ort_weights',
             shape=shape,
             initializer=orthogonal_initializer(),
             wd=None)
         conv = tf.nn.conv2d(inputT, kernel, [1, 1, 1, 1], padding='SAME')
         biases = util._variable('biases', [out_channel],
                                 tf.constant_initializer(0.0))
         bias = tf.nn.bias_add(conv, biases)
         if activation is True:
             conv_out = tf.nn.relu(
                 self.batch_norm_layer(bias, train_phase, scope.name))
         else:
             conv_out = self.batch_norm_layer(bias, train_phase, scope.name)
     return conv_out
def emission_network(state):
    #outputs (x,y,stop)
    #(x,y) is location tuple
    #stop is whether or not to stop recurring
    with tf.variable_scope('emission/fc1') as scope:
        W_fc1 = util._variable_with_weight_decay('weights', shape=[FLAGS.lstm_size, 3],
                                           stddev=1e-4, wd=0.0)
        b_fc1 = tf.get_variable('biases', [3], initializer=tf.constant_initializer(0.1))
        fc1 = tf.nn.relu(tf.matmul(state, W_fc1) + b_fc1)
    return fc1
def classification_network(state):
    with tf.variable_scope('classification/fc1') as scope:
        W_fc1 = util._variable_with_weight_decay('weights', shape=[2*FLAGS.lstm_size, FLAGS.num_classes],
                                           stddev=1e-4, wd=0.0)
        b_fc1 = tf.get_variable('biases', [FLAGS.num_classes], initializer=tf.constant_initializer(0.1))
        fc1 = tf.nn.relu(tf.matmul(state, W_fc1) + b_fc1)
        _activation_summary(fc1)
    with tf.variable_scope('classification/fc1') as scope:
        softmax = tf.nn.softmax(fc1)
        _activation_summary(softmax)
    return softmax
Esempio n. 6
0
def conv_plane(inputs,
               num_output_channels,
               kernel_size,
               scope,
               pool,
               use_xavier=True,
               stddev=1e-3,
               weight_decay=0.0,
               activation_fn=tf.nn.sigmoid,
               bn=False,
               bn_decay=None,
               is_training=None):
    with tf.variable_scope(scope) as sc:

        kernel_h, kernel_w = kernel_size
        kernel_shape = [kernel_h, kernel_w,
                        num_output_channels]
        kernel = util._variable_with_weight_decay('weights',
                                             shape=kernel_shape,
                                             use_xavier=use_xavier,
                                             stddev=stddev,
                                             wd=weight_decay)


        num_output_channels = kernel.get_shape()[-1].value

        input_re = tf.expand_dims(inputs, 1)

        kernel_re = tf.transpose(kernel, (0, 2, 1))
        kernel_re = tf.expand_dims(kernel_re, 2)
        kernel_re = tf.expand_dims(kernel_re, 3)

        outputs = tf.reduce_sum(tf.multiply(input_re, kernel_re), -1)
        outputs = tf.transpose(outputs, (0, 2, 3, 1))
        d = util._variable_on_cpu('d', [num_output_channels],
                             tf.constant_initializer(0.0))
        outputs = tf.add(outputs, d)
        outputs = tf.abs(outputs)
        #outputs = tf.divide(outputs, tf.norm(kernel, axis=1, keep_dims=True)) #axis=????????/

        if pool == 'max':
            outputs = tf.reduce_max(outputs, 2,keep_dims=True)
        if pool == 'sum':
            outputs = tf.reduce_sum(outputs, 2,keep_dims=True)
        elif pool=='avg':
            outputs = tf.reduce_sum(outputs, 2,keep_dims=True)
            nsample = inputs.get_shape()[2].value
            outputs = tf.divide(outputs, nsample)
        elif pool == 'minmax':
            max = tf.reduce_max(outputs, 2,keep_dims=True)
            min = tf.reduce_min(outputs, 2,keep_dims=True)
            outputs = tf.subtract(max,min)

        outputs = tf.negative(outputs)


        if bn:
            outputs = util.batch_norm_for_conv2d(outputs, is_training,
                                          bn_decay=bn_decay, scope='bn-plane')


        if activation_fn is not None:
            outputs = tf.nn.sigmoid(outputs)

        return outputs
def glimpse_network(full_image, location):
    glimpse = _extract_glimpse_from_location(full_image, location)
    glimpse_vars = {}
    #glimpse of size (batch_size, glimpse_size, glimpse_size, 3)
    # conv1
    with tf.variable_scope('glimpse/image') as outer_scope:
        with tf.variable_scope('conv1') as scope:
            kernel1 = _xavier_variable('weights', shape=[5, 5, 3, 64], fan_in=5*5*3, fan_out=5*5*64)
            conv = tf.nn.conv2d(glimpse, kernel1, [1, 1, 1, 1], padding='SAME')
            biases1 = _xavier_variable('biases', [64], fan_in=1, fan_out=5*5*64)
            bias = tf.reshape(tf.nn.bias_add(conv, biases1), [FLAGS.batch_size, FLAGS.glimpse_size, FLAGS.glimpse_size, 64])
            conv1 = tf.nn.relu(bias, name=scope.name)
            dropped_conv1 = tf.nn.dropout(conv1, .8)
            _activation_summary(dropped_conv1)
            glimpse_vars['conv1/weights:0'] = kernel1
            glimpse_vars['conv1/biases:0'] = biases1

        # pool1
        pool1 = tf.nn.max_pool(dropped_conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1],
                               padding='SAME', name='pool1')
        # norm1
        norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75,
                            name='norm1')

        # conv2
        with tf.variable_scope('conv2') as scope:
            kernel2 = _xavier_variable('weights', shape=[5, 5, 64, 64], fan_in=5*5*64, fan_out=1)
            conv = tf.nn.conv2d(norm1, kernel2, [1, 1, 1, 1], padding='SAME')
            biases2 = _xavier_variable('biases', [64], fan_in=1, fan_out=5*5*64)
            bias = tf.reshape(tf.nn.bias_add(conv, biases2), conv.get_shape().as_list())
            conv2 = tf.nn.relu(bias, name=scope.name)
            dropped_conv2 = tf.nn.dropout(conv2, .8)
            _activation_summary(dropped_conv2)
            glimpse_vars['conv2/weights:0'] = kernel2
            glimpse_vars['conv2/biases:0'] = biases2

        # norm2
        norm2 = tf.nn.lrn(dropped_conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75,
                          name='norm2')
        # pool2
        pool2 = tf.nn.max_pool(norm2, ksize=[1, 3, 3, 1],
                               strides=[1, 2, 2, 1], padding='SAME', name='pool2')

        # conv3
        with tf.variable_scope('conv3') as scope:
            kernel3 = _xavier_variable('weights', shape=[7, 7, 64, 64], fan_in=7*7*64, fan_out=1)
            conv = tf.nn.conv2d(pool2, kernel3, [1, 1, 1, 1], padding='VALID')
            biases3 = _xavier_variable('biases', [64], fan_in=1, fan_out=7*7*64)
            bias = tf.reshape(tf.nn.bias_add(conv, biases3), conv.get_shape().as_list())
            conv3 = tf.nn.relu(bias, name=scope.name)
            dropped_conv3 = tf.nn.dropout(conv3, .8)
            _activation_summary(dropped_conv3)
            glimpse_vars['conv3/weights:0'] = kernel3
            glimpse_vars['conv3/biases:0'] = biases3

        # norm3
        norm3 = tf.nn.lrn(dropped_conv3, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75,
                          name='norm3')
        # pool3
        pool3 = tf.nn.max_pool(norm3, ksize=[1, 3, 3, 1],
                               strides=[1, 2, 2, 1], padding='SAME', name='pool3')

        # fc4
        with tf.variable_scope('fc4') as scope:
            # Move everything into depth so we can perform a single matrix multiply.
            dim = 1
            for d in pool3.get_shape()[1:].as_list():
              dim *= d
            reshape = tf.reshape(pool3, [FLAGS.batch_size, dim])

            weights4 = _xavier_variable('weights', shape=[dim,FLAGS.lstm_size], fan_in=dim,fan_out=1, wd=.004)
            biases4 = _xavier_variable('biases', [FLAGS.lstm_size], fan_in=1, fan_out=FLAGS.lstm_size)
            fc4 = tf.nn.relu(tf.nn.bias_add(tf.matmul(reshape, weights4), biases4), name=scope.name)
            dropped_fc4 = tf.nn.dropout(fc4, .8)
            _activation_summary(dropped_fc4)


    # fc1
    with tf.variable_scope('glimpse/location/fc1') as scope:
        W_fc1 = util._variable_with_weight_decay('weights', shape=[2, FLAGS.lstm_size],
                                           stddev=1e-4, wd=0.0)
        b_fc1 = tf.get_variable('biases', [FLAGS.lstm_size], initializer=tf.constant_initializer(0.1))

        location_flat = tf.reshape(location, [-1, 2])
        fc1 = tf.nn.relu(tf.matmul(location_flat, W_fc1) + b_fc1)
        dropped_fc1 = tf.nn.dropout(fc1, .8)
        _activation_summary(dropped_fc1)

    # output feature vector
    with tf.variable_scope('glimpse/output') as scope:
        output = tf.mul(dropped_fc1, dropped_fc4)
        _activation_summary(output)
    return output, glimpse_vars
Esempio n. 8
0
def inference(feats, seq_lens):
    '''
    Build the deepBrain model.

    :feats: ECoG features returned from inputs().
    :seq_lens: Input sequence length for each utterance.

    :returns: logits.
    '''
    dtype = tf.float16 if FLAGS.use_fp16 else tf.float32

    feat_len = feats.get_shape().as_list()[-1]

    # expand the dimension of feats from [batch_size, T, CH] to [batch_size, T, CH, 1]
    feats = tf.expand_dims(feats, dim=-1)
    
    # convolutional layers
    with tf.variable_scope('conv1') as scope:
        conv_drop, kernel = conv_layer(l_input=feats,
                                       kernel_shape=[11, feat_len, 1, FLAGS.num_filters],
                                       scope=scope)

    if FLAGS.num_conv_layers > 1:
        for layer in range(2, FLAGS.num_conv_layers + 1):
            with tf.variable_scope('conv' + str(layer)) as scope:
                conv_drop, _ = conv_layer(l_input=conv_drop,
                                          kernel_shape=[11, feat_len, FLAGS.num_filters, FLAGS.num_filters],
                                          scope=scope)


    # recurrent layer
    with tf.variable_scope('rnn') as scope:

        # Reshape conv output to fit rnn input
        rnn_input = tf.reshape(conv_drop, [FLAGS.batch_size, -1, feat_len*FLAGS.num_filters])
        
        # Permute into time major order for rnn
        rnn_input = tf.transpose(rnn_input, perm=[1, 0, 2])
        
        # Make one instance of cell on a fixed device,
        # and use copies of the weights on other devices.
        if FLAGS.cell_type == 'LSTM':
            cell = tf.nn.rnn_cell.LSTMCell(FLAGS.num_hidden, activation=tf.nn.relu6)
        elif FLAGS.cell_type == 'CustomRNN':
            cell = custom_RNN.LayerNormalizedLSTMCell(FLAGS.num_hidden, activation=tf.nn.relu6, use_fp16=use_fp16)
            
        drop_cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=FLAGS.keep_prob)
        multi_cell = tf.nn.rnn_cell.MultiRNNCell([drop_cell] * FLAGS.num_rnn_layers)

        seq_lens = tf.div(seq_lens, FLAGS.temporal_stride)
        if FLAGS.rnn_type == 'uni-dir':
            rnn_outputs, _ = tf.nn.dynamic_rnn(multi_cell, rnn_input,
                                               sequence_length=seq_lens,
                                               dtype=dtype, time_major=True, 
                                               scope='rnn')
        else:
            outputs, _ = tf.nn.bidirectional_dynamic_rnn(
                multi_cell, multi_cell, rnn_input,
                sequence_length=seq_lens, dtype=dtype,
                time_major=True, scope='rnn')
            outputs_fw, outputs_bw = outputs
            rnn_outputs = outputs_fw + outputs_bw
        _activation_summary(rnn_outputs)

    # Linear layer(WX + b) - softmax is applied by CTC cost function.
    with tf.variable_scope('fully_connected') as scope:
        weights = _variable_with_weight_decay(
            'weights', [FLAGS.num_hidden, NUM_CLASSES],
            wd_value=None,
            use_fp16=FLAGS.use_fp16)
        biases = _variable('biases', [NUM_CLASSES],
                                  tf.constant_initializer(0.0),
                                  FLAGS.use_fp16)
        logit_inputs = tf.reshape(rnn_outputs, [-1, cell.output_size])
        logits = tf.add(tf.matmul(logit_inputs, weights),
                        biases, name=scope.name)
        logits = tf.reshape(logits, [-1, FLAGS.batch_size, NUM_CLASSES])
        _activation_summary(logits)

    return logits
    def add_prediction_op(self):
        # norm1
        norm1 = tf.nn.lrn(self.train_data_node,
                          depth_radius=5,
                          bias=1.0,
                          alpha=0.0001,
                          beta=0.75,
                          name='norm1')

        # conv1
        conv1 = self.conv_layer_with_bn(
            norm1,
            [7, 7, self.train_data_node.get_shape().as_list()[3], 64],
            self.phase_train,
            name="conv1")

        # pool1
        pool1, pool1_indices = tf.nn.max_pool_with_argmax(conv1,
                                                          ksize=[1, 2, 2, 1],
                                                          strides=[1, 2, 2, 1],
                                                          padding='SAME',
                                                          name='pool1')

        # conv2
        conv2 = self.conv_layer_with_bn(pool1, [7, 7, 64, 64],
                                        self.phase_train,
                                        name="conv2")

        # pool2
        pool2, pool2_indices = tf.nn.max_pool_with_argmax(conv2,
                                                          ksize=[1, 2, 2, 1],
                                                          strides=[1, 2, 2, 1],
                                                          padding='SAME',
                                                          name='pool2')
        # conv3
        conv3 = self.conv_layer_with_bn(pool2, [7, 7, 64, 64],
                                        self.phase_train,
                                        name="conv3")

        # pool3
        pool3, pool3_indices = tf.nn.max_pool_with_argmax(conv3,
                                                          ksize=[1, 2, 2, 1],
                                                          strides=[1, 2, 2, 1],
                                                          padding='SAME',
                                                          name='pool3')
        # conv4
        conv4 = self.conv_layer_with_bn(pool3, [7, 7, 64, 64],
                                        self.phase_train,
                                        name="conv4")
        """ End of encoder """
        """ start upsample """

        # pool4
        pool4, pool4_indices = tf.nn.max_pool_with_argmax(conv4,
                                                          ksize=[1, 2, 2, 1],
                                                          strides=[1, 2, 2, 1],
                                                          padding='SAME',
                                                          name='pool4')
        # upsample4
        # Need to change when using different dataset out_w, out_h
        # upsample4 = upsample_with_pool_indices(pool4, pool4_indices, pool4.get_shape(), out_w=45, out_h=60, scale=2, name='upsample4')
        upsample4 = self.deconv_layer(pool4, [2, 2, 64, 64],
                                      [self.config.BATCH_SIZE, 64, 64, 64], 2,
                                      "up4")
        # decode 4
        conv_decode4 = self.conv_layer_with_bn(upsample4, [7, 7, 64, 64],
                                               self.phase_train,
                                               False,
                                               name="conv_decode4")

        # upsample 3
        # upsample3 = upsample_with_pool_indices(conv_decode4, pool3_indices, conv_decode4.get_shape(), scale=2, name='upsample3')
        upsample3 = self.deconv_layer(conv_decode4, [2, 2, 64, 64],
                                      [self.config.BATCH_SIZE, 128, 128, 64],
                                      2, "up3")
        # decode 3
        conv_decode3 = self.conv_layer_with_bn(upsample3, [7, 7, 64, 64],
                                               self.phase_train,
                                               False,
                                               name="conv_decode3")

        # upsample2
        # upsample2 = upsample_with_pool_indices(conv_decode3, pool2_indices, conv_decode3.get_shape(), scale=2, name='upsample2')
        upsample2 = self.deconv_layer(conv_decode3, [2, 2, 64, 64],
                                      [self.config.BATCH_SIZE, 256, 256, 64],
                                      2, "up2")
        # decode 2
        conv_decode2 = self.conv_layer_with_bn(upsample2, [7, 7, 64, 64],
                                               self.phase_train,
                                               False,
                                               name="conv_decode2")

        # upsample1
        # upsample1 = upsample_with_pool_indices(conv_decode2, pool1_indices, conv_decode2.get_shape(), scale=2, name='upsample1')
        upsample1 = self.deconv_layer(conv_decode2, [2, 2, 64, 64],
                                      [self.config.BATCH_SIZE, 512, 512, 64],
                                      2, "up1")
        # decode4
        conv_decode1 = self.conv_layer_with_bn(upsample1, [7, 7, 64, 64],
                                               self.phase_train,
                                               False,
                                               name="conv_decode1")
        """ Start Classify """
        # output predicted class number (6)
        with tf.variable_scope('conv_classifier',
                               reuse=tf.AUTO_REUSE) as scope:
            kernel = util._variable_with_weight_decay(
                'weights',
                shape=[1, 1, 64, 2],
                initializer=customer_init.msra_initializer(1, 64),
                wd=0.0005)
            conv = tf.nn.conv2d(conv_decode1,
                                kernel, [1, 1, 1, 1],
                                padding='SAME')
            biases = util._variable('biases', [2],
                                    tf.constant_initializer(0.0))
            conv_classifier = tf.nn.bias_add(conv, biases, name=scope.name)

        logit = conv_classifier

        loss = self.cal_loss(conv_classifier, self.train_label_node)

        return loss, logit