def context_network(low_res): # conv1 with tf.variable_scope('context/conv1') as scope: kernel = util._variable_with_weight_decay('weights', shape=[5, 5, 3, 64], stddev=1e-4, wd=0.0) conv = tf.nn.conv2d(low_res, kernel, [1, 2, 2, 1], padding='SAME') biases = tf.get_variable('biases', [64], initializer=tf.constant_initializer(0.0)) bias = tf.reshape(tf.nn.bias_add(conv, biases), conv.get_shape().as_list()) conv1 = tf.nn.relu(bias, name=scope.name) _activation_summary(conv1) # conv2 with tf.variable_scope('context/conv2') as scope: kernel = util._variable_with_weight_decay('weights', shape=[5, 5, 64, 64], stddev=1e-4, wd=0.0) conv = tf.nn.conv2d(conv1, kernel, [1, 1, 1, 1], padding='SAME') biases = tf.get_variable('biases', [64], initializer=tf.constant_initializer(0.1)) bias = tf.reshape(tf.nn.bias_add(conv, biases), conv.get_shape().as_list()) conv2 = tf.nn.relu(bias, name=scope.name) _activation_summary(conv2) # conv3 with tf.variable_scope('context/conv3') as scope: kernel = util._variable_with_weight_decay('weights', shape=[7, 7, 64, 2], stddev=1e-4, wd=0.0) conv = tf.nn.conv2d(conv2, kernel, [1, 1, 1, 1], padding='SAME') biases = tf.get_variable('biases', [2], initializer=tf.constant_initializer(0.1)) bias = tf.reshape(tf.nn.bias_add(conv, biases), conv.get_shape().as_list()) conv3 = tf.nn.relu(bias, name=scope.name) _activation_summary(conv3) #convert to 1-d for inputting into LSTM return tf.reshape(conv3, [FLAGS.batch_size, -1])
def conv_layer(l_input, kernel_shape, scope): ''' Convolutional layers wrapper function. :feats: input of conv layer :kernel_shape: shape of filter :returns: :conv_drop: tensor variable :kernel: tensor variable ''' kernel = _variable_with_weight_decay( 'weights', shape=kernel_shape, wd_value=None, use_fp16=FLAGS.use_fp16) conv = tf.nn.conv2d(l_input, kernel, [1, FLAGS.temporal_stride, 1, 1], padding='SAME') biases = _variable('biases', [FLAGS.num_filters], tf.constant_initializer(-0.05), FLAGS.use_fp16) bias = tf.nn.bias_add(conv, biases) conv = tf.nn.relu(bias, name=scope.name) _activation_summary(conv) # dropout conv_drop = tf.nn.dropout(conv, FLAGS.keep_prob) return conv_drop, kernel
def conv_layer_with_bn(self, inputT, shape, train_phase, activation=True, name=None): in_channel = shape[2] out_channel = shape[3] k_size = shape[0] with tf.variable_scope(name, reuse=tf.AUTO_REUSE) as scope: kernel = util._variable_with_weight_decay( 'ort_weights', shape=shape, initializer=orthogonal_initializer(), wd=None) conv = tf.nn.conv2d(inputT, kernel, [1, 1, 1, 1], padding='SAME') biases = util._variable('biases', [out_channel], tf.constant_initializer(0.0)) bias = tf.nn.bias_add(conv, biases) if activation is True: conv_out = tf.nn.relu( self.batch_norm_layer(bias, train_phase, scope.name)) else: conv_out = self.batch_norm_layer(bias, train_phase, scope.name) return conv_out
def emission_network(state): #outputs (x,y,stop) #(x,y) is location tuple #stop is whether or not to stop recurring with tf.variable_scope('emission/fc1') as scope: W_fc1 = util._variable_with_weight_decay('weights', shape=[FLAGS.lstm_size, 3], stddev=1e-4, wd=0.0) b_fc1 = tf.get_variable('biases', [3], initializer=tf.constant_initializer(0.1)) fc1 = tf.nn.relu(tf.matmul(state, W_fc1) + b_fc1) return fc1
def classification_network(state): with tf.variable_scope('classification/fc1') as scope: W_fc1 = util._variable_with_weight_decay('weights', shape=[2*FLAGS.lstm_size, FLAGS.num_classes], stddev=1e-4, wd=0.0) b_fc1 = tf.get_variable('biases', [FLAGS.num_classes], initializer=tf.constant_initializer(0.1)) fc1 = tf.nn.relu(tf.matmul(state, W_fc1) + b_fc1) _activation_summary(fc1) with tf.variable_scope('classification/fc1') as scope: softmax = tf.nn.softmax(fc1) _activation_summary(softmax) return softmax
def conv_plane(inputs, num_output_channels, kernel_size, scope, pool, use_xavier=True, stddev=1e-3, weight_decay=0.0, activation_fn=tf.nn.sigmoid, bn=False, bn_decay=None, is_training=None): with tf.variable_scope(scope) as sc: kernel_h, kernel_w = kernel_size kernel_shape = [kernel_h, kernel_w, num_output_channels] kernel = util._variable_with_weight_decay('weights', shape=kernel_shape, use_xavier=use_xavier, stddev=stddev, wd=weight_decay) num_output_channels = kernel.get_shape()[-1].value input_re = tf.expand_dims(inputs, 1) kernel_re = tf.transpose(kernel, (0, 2, 1)) kernel_re = tf.expand_dims(kernel_re, 2) kernel_re = tf.expand_dims(kernel_re, 3) outputs = tf.reduce_sum(tf.multiply(input_re, kernel_re), -1) outputs = tf.transpose(outputs, (0, 2, 3, 1)) d = util._variable_on_cpu('d', [num_output_channels], tf.constant_initializer(0.0)) outputs = tf.add(outputs, d) outputs = tf.abs(outputs) #outputs = tf.divide(outputs, tf.norm(kernel, axis=1, keep_dims=True)) #axis=????????/ if pool == 'max': outputs = tf.reduce_max(outputs, 2,keep_dims=True) if pool == 'sum': outputs = tf.reduce_sum(outputs, 2,keep_dims=True) elif pool=='avg': outputs = tf.reduce_sum(outputs, 2,keep_dims=True) nsample = inputs.get_shape()[2].value outputs = tf.divide(outputs, nsample) elif pool == 'minmax': max = tf.reduce_max(outputs, 2,keep_dims=True) min = tf.reduce_min(outputs, 2,keep_dims=True) outputs = tf.subtract(max,min) outputs = tf.negative(outputs) if bn: outputs = util.batch_norm_for_conv2d(outputs, is_training, bn_decay=bn_decay, scope='bn-plane') if activation_fn is not None: outputs = tf.nn.sigmoid(outputs) return outputs
def glimpse_network(full_image, location): glimpse = _extract_glimpse_from_location(full_image, location) glimpse_vars = {} #glimpse of size (batch_size, glimpse_size, glimpse_size, 3) # conv1 with tf.variable_scope('glimpse/image') as outer_scope: with tf.variable_scope('conv1') as scope: kernel1 = _xavier_variable('weights', shape=[5, 5, 3, 64], fan_in=5*5*3, fan_out=5*5*64) conv = tf.nn.conv2d(glimpse, kernel1, [1, 1, 1, 1], padding='SAME') biases1 = _xavier_variable('biases', [64], fan_in=1, fan_out=5*5*64) bias = tf.reshape(tf.nn.bias_add(conv, biases1), [FLAGS.batch_size, FLAGS.glimpse_size, FLAGS.glimpse_size, 64]) conv1 = tf.nn.relu(bias, name=scope.name) dropped_conv1 = tf.nn.dropout(conv1, .8) _activation_summary(dropped_conv1) glimpse_vars['conv1/weights:0'] = kernel1 glimpse_vars['conv1/biases:0'] = biases1 # pool1 pool1 = tf.nn.max_pool(dropped_conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool1') # norm1 norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1') # conv2 with tf.variable_scope('conv2') as scope: kernel2 = _xavier_variable('weights', shape=[5, 5, 64, 64], fan_in=5*5*64, fan_out=1) conv = tf.nn.conv2d(norm1, kernel2, [1, 1, 1, 1], padding='SAME') biases2 = _xavier_variable('biases', [64], fan_in=1, fan_out=5*5*64) bias = tf.reshape(tf.nn.bias_add(conv, biases2), conv.get_shape().as_list()) conv2 = tf.nn.relu(bias, name=scope.name) dropped_conv2 = tf.nn.dropout(conv2, .8) _activation_summary(dropped_conv2) glimpse_vars['conv2/weights:0'] = kernel2 glimpse_vars['conv2/biases:0'] = biases2 # norm2 norm2 = tf.nn.lrn(dropped_conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2') # pool2 pool2 = tf.nn.max_pool(norm2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool2') # conv3 with tf.variable_scope('conv3') as scope: kernel3 = _xavier_variable('weights', shape=[7, 7, 64, 64], fan_in=7*7*64, fan_out=1) conv = tf.nn.conv2d(pool2, kernel3, [1, 1, 1, 1], padding='VALID') biases3 = _xavier_variable('biases', [64], fan_in=1, fan_out=7*7*64) bias = tf.reshape(tf.nn.bias_add(conv, biases3), conv.get_shape().as_list()) conv3 = tf.nn.relu(bias, name=scope.name) dropped_conv3 = tf.nn.dropout(conv3, .8) _activation_summary(dropped_conv3) glimpse_vars['conv3/weights:0'] = kernel3 glimpse_vars['conv3/biases:0'] = biases3 # norm3 norm3 = tf.nn.lrn(dropped_conv3, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm3') # pool3 pool3 = tf.nn.max_pool(norm3, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool3') # fc4 with tf.variable_scope('fc4') as scope: # Move everything into depth so we can perform a single matrix multiply. dim = 1 for d in pool3.get_shape()[1:].as_list(): dim *= d reshape = tf.reshape(pool3, [FLAGS.batch_size, dim]) weights4 = _xavier_variable('weights', shape=[dim,FLAGS.lstm_size], fan_in=dim,fan_out=1, wd=.004) biases4 = _xavier_variable('biases', [FLAGS.lstm_size], fan_in=1, fan_out=FLAGS.lstm_size) fc4 = tf.nn.relu(tf.nn.bias_add(tf.matmul(reshape, weights4), biases4), name=scope.name) dropped_fc4 = tf.nn.dropout(fc4, .8) _activation_summary(dropped_fc4) # fc1 with tf.variable_scope('glimpse/location/fc1') as scope: W_fc1 = util._variable_with_weight_decay('weights', shape=[2, FLAGS.lstm_size], stddev=1e-4, wd=0.0) b_fc1 = tf.get_variable('biases', [FLAGS.lstm_size], initializer=tf.constant_initializer(0.1)) location_flat = tf.reshape(location, [-1, 2]) fc1 = tf.nn.relu(tf.matmul(location_flat, W_fc1) + b_fc1) dropped_fc1 = tf.nn.dropout(fc1, .8) _activation_summary(dropped_fc1) # output feature vector with tf.variable_scope('glimpse/output') as scope: output = tf.mul(dropped_fc1, dropped_fc4) _activation_summary(output) return output, glimpse_vars
def inference(feats, seq_lens): ''' Build the deepBrain model. :feats: ECoG features returned from inputs(). :seq_lens: Input sequence length for each utterance. :returns: logits. ''' dtype = tf.float16 if FLAGS.use_fp16 else tf.float32 feat_len = feats.get_shape().as_list()[-1] # expand the dimension of feats from [batch_size, T, CH] to [batch_size, T, CH, 1] feats = tf.expand_dims(feats, dim=-1) # convolutional layers with tf.variable_scope('conv1') as scope: conv_drop, kernel = conv_layer(l_input=feats, kernel_shape=[11, feat_len, 1, FLAGS.num_filters], scope=scope) if FLAGS.num_conv_layers > 1: for layer in range(2, FLAGS.num_conv_layers + 1): with tf.variable_scope('conv' + str(layer)) as scope: conv_drop, _ = conv_layer(l_input=conv_drop, kernel_shape=[11, feat_len, FLAGS.num_filters, FLAGS.num_filters], scope=scope) # recurrent layer with tf.variable_scope('rnn') as scope: # Reshape conv output to fit rnn input rnn_input = tf.reshape(conv_drop, [FLAGS.batch_size, -1, feat_len*FLAGS.num_filters]) # Permute into time major order for rnn rnn_input = tf.transpose(rnn_input, perm=[1, 0, 2]) # Make one instance of cell on a fixed device, # and use copies of the weights on other devices. if FLAGS.cell_type == 'LSTM': cell = tf.nn.rnn_cell.LSTMCell(FLAGS.num_hidden, activation=tf.nn.relu6) elif FLAGS.cell_type == 'CustomRNN': cell = custom_RNN.LayerNormalizedLSTMCell(FLAGS.num_hidden, activation=tf.nn.relu6, use_fp16=use_fp16) drop_cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=FLAGS.keep_prob) multi_cell = tf.nn.rnn_cell.MultiRNNCell([drop_cell] * FLAGS.num_rnn_layers) seq_lens = tf.div(seq_lens, FLAGS.temporal_stride) if FLAGS.rnn_type == 'uni-dir': rnn_outputs, _ = tf.nn.dynamic_rnn(multi_cell, rnn_input, sequence_length=seq_lens, dtype=dtype, time_major=True, scope='rnn') else: outputs, _ = tf.nn.bidirectional_dynamic_rnn( multi_cell, multi_cell, rnn_input, sequence_length=seq_lens, dtype=dtype, time_major=True, scope='rnn') outputs_fw, outputs_bw = outputs rnn_outputs = outputs_fw + outputs_bw _activation_summary(rnn_outputs) # Linear layer(WX + b) - softmax is applied by CTC cost function. with tf.variable_scope('fully_connected') as scope: weights = _variable_with_weight_decay( 'weights', [FLAGS.num_hidden, NUM_CLASSES], wd_value=None, use_fp16=FLAGS.use_fp16) biases = _variable('biases', [NUM_CLASSES], tf.constant_initializer(0.0), FLAGS.use_fp16) logit_inputs = tf.reshape(rnn_outputs, [-1, cell.output_size]) logits = tf.add(tf.matmul(logit_inputs, weights), biases, name=scope.name) logits = tf.reshape(logits, [-1, FLAGS.batch_size, NUM_CLASSES]) _activation_summary(logits) return logits
def add_prediction_op(self): # norm1 norm1 = tf.nn.lrn(self.train_data_node, depth_radius=5, bias=1.0, alpha=0.0001, beta=0.75, name='norm1') # conv1 conv1 = self.conv_layer_with_bn( norm1, [7, 7, self.train_data_node.get_shape().as_list()[3], 64], self.phase_train, name="conv1") # pool1 pool1, pool1_indices = tf.nn.max_pool_with_argmax(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool1') # conv2 conv2 = self.conv_layer_with_bn(pool1, [7, 7, 64, 64], self.phase_train, name="conv2") # pool2 pool2, pool2_indices = tf.nn.max_pool_with_argmax(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool2') # conv3 conv3 = self.conv_layer_with_bn(pool2, [7, 7, 64, 64], self.phase_train, name="conv3") # pool3 pool3, pool3_indices = tf.nn.max_pool_with_argmax(conv3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool3') # conv4 conv4 = self.conv_layer_with_bn(pool3, [7, 7, 64, 64], self.phase_train, name="conv4") """ End of encoder """ """ start upsample """ # pool4 pool4, pool4_indices = tf.nn.max_pool_with_argmax(conv4, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool4') # upsample4 # Need to change when using different dataset out_w, out_h # upsample4 = upsample_with_pool_indices(pool4, pool4_indices, pool4.get_shape(), out_w=45, out_h=60, scale=2, name='upsample4') upsample4 = self.deconv_layer(pool4, [2, 2, 64, 64], [self.config.BATCH_SIZE, 64, 64, 64], 2, "up4") # decode 4 conv_decode4 = self.conv_layer_with_bn(upsample4, [7, 7, 64, 64], self.phase_train, False, name="conv_decode4") # upsample 3 # upsample3 = upsample_with_pool_indices(conv_decode4, pool3_indices, conv_decode4.get_shape(), scale=2, name='upsample3') upsample3 = self.deconv_layer(conv_decode4, [2, 2, 64, 64], [self.config.BATCH_SIZE, 128, 128, 64], 2, "up3") # decode 3 conv_decode3 = self.conv_layer_with_bn(upsample3, [7, 7, 64, 64], self.phase_train, False, name="conv_decode3") # upsample2 # upsample2 = upsample_with_pool_indices(conv_decode3, pool2_indices, conv_decode3.get_shape(), scale=2, name='upsample2') upsample2 = self.deconv_layer(conv_decode3, [2, 2, 64, 64], [self.config.BATCH_SIZE, 256, 256, 64], 2, "up2") # decode 2 conv_decode2 = self.conv_layer_with_bn(upsample2, [7, 7, 64, 64], self.phase_train, False, name="conv_decode2") # upsample1 # upsample1 = upsample_with_pool_indices(conv_decode2, pool1_indices, conv_decode2.get_shape(), scale=2, name='upsample1') upsample1 = self.deconv_layer(conv_decode2, [2, 2, 64, 64], [self.config.BATCH_SIZE, 512, 512, 64], 2, "up1") # decode4 conv_decode1 = self.conv_layer_with_bn(upsample1, [7, 7, 64, 64], self.phase_train, False, name="conv_decode1") """ Start Classify """ # output predicted class number (6) with tf.variable_scope('conv_classifier', reuse=tf.AUTO_REUSE) as scope: kernel = util._variable_with_weight_decay( 'weights', shape=[1, 1, 64, 2], initializer=customer_init.msra_initializer(1, 64), wd=0.0005) conv = tf.nn.conv2d(conv_decode1, kernel, [1, 1, 1, 1], padding='SAME') biases = util._variable('biases', [2], tf.constant_initializer(0.0)) conv_classifier = tf.nn.bias_add(conv, biases, name=scope.name) logit = conv_classifier loss = self.cal_loss(conv_classifier, self.train_label_node) return loss, logit