def _convLSTM(self, input_hidden_state, scope_name='convLSTM', initial_state=None, trainable=True, scope_reuse=False): with tf.variable_scope(scope_name) as scope: if scope_reuse: scope.reuse_variables() # Create a placeholder for videos. print scope_name, input_hidden_state.get_shape() cell = ConvLSTMCell([self.IMAGE_HEIGHT / 8, self.IMAGE_WIDTH / 8], self.lstm_channel, self.conLSTM_kernel) if initial_state == None: outputs, state = tf.nn.dynamic_rnn( cell, input_hidden_state, initial_state=cell.zero_state(1, dtype=tf.float32), dtype=input_hidden_state.dtype) else: outputs, state = tf.nn.dynamic_rnn( cell, input_hidden_state, initial_state=initial_state, dtype=input_hidden_state.dtype) print scope_name, outputs.get_shape() return outputs, state, cell.return_weight()
def conv_lstm_encoder(self, H, W, filter_size, kernel, encoder_input): with tf.variable_scope('enc_lstm_model', reuse=self.reuse_conv_lstm_encoder): encoder_cell = ConvLSTMCell([H, W], filter_size, kernel, reuse=tf.get_variable_scope().reuse) zero_state = encoder_cell.zero_state(self.batch_size, dtype=tf.float32) _, encoded_state = tf.nn.dynamic_rnn(cell=encoder_cell, inputs=encoder_input, initial_state=zero_state) self.reuse_conv_lstm_encoder = True return encoded_state
def biconvlstm_B(x, R_lstmout, height, width, filter_num, conv_filter_num, kernel, batch_size, timestep, istraining, reuse = False, scope='B_LSTM_net'): with tf.variable_scope(scope, reuse=reuse): print("shape: %s %s" %(height, width)) h_conv_post_1 = dense_unit_4_layer(x, is_training) h_conv_post_1 = tf.reshape(h_conv_post_1, [-1, timestep, height, width, 12]) convlstm_layer_qx = ConvLSTMCell(shape=[height, width], filters=filter_num, kernel=kernel) convlstm_layer_hx = ConvLSTMCell(shape=[height, width], filters=filter_num, kernel=kernel) outputs, state = tf.nn.bidirectional_dynamic_rnn(convlstm_layer_qx, convlstm_layer_hx, h_conv_post_1, dtype=h_conv_post_1.dtype, scope = 'B_bi_dynamic_rnn') outputs = tf.concat(outputs, 4) convlstm_layer_qx_1 = ConvLSTMCell(shape=[height, width], filters=filter_num, kernel=kernel) convlstm_layer_hx_1 = ConvLSTMCell(shape=[height, width], filters=filter_num, kernel=kernel) outputs, state = tf.nn.bidirectional_dynamic_rnn(convlstm_layer_qx_1, convlstm_layer_hx_1, outputs, dtype=outputs.dtype, scope = 'B_bi_dynamic_rnn_1') outputs = tf.concat(outputs, 4) outputs = tf.reshape(outputs[:, (timestep - 1) // 2, :, :, :], [-1, height, width, 2 * filter_num]) r = bias_variable([1]) #weight parameter: r outputs = outputs * (R_lstmout * (1 - r) + 1 * r) c11_w = tf.get_variable("B_c11_w", shape=[3, 3, 2 * filter_num, conv_filter_num], initializer=tf.contrib.layers.xavier_initializer(uniform=True)) c11_b = tf.get_variable("B_c11_b", shape=[conv_filter_num], initializer=tf.constant_initializer(0.0)) c12_w = tf.get_variable("B_c12_w", shape=[3, 3, conv_filter_num, (conv_filter_num // 2)], initializer=tf.contrib.layers.xavier_initializer(uniform=True)) c12_b = tf.get_variable("B_c12_b", shape=[(conv_filter_num // 2)], initializer=tf.constant_initializer(0.0)) c13_w = tf.get_variable("B_c13_w", shape=[3, 3, (conv_filter_num // 2), 1], initializer=tf.contrib.layers.xavier_initializer(uniform=True)) c13_b = tf.get_variable("B_c13_b", shape=[1], initializer=tf.constant_initializer(0.0)) c11 = tf.nn.conv2d(outputs, c11_w, strides=[1, 1, 1, 1], padding='SAME') c11 = tf.nn.bias_add(c11, c11_b) c11 = tflearn.activations.prelu(c11) c12 = tf.nn.conv2d(c11, c12_w, strides=[1, 1, 1, 1], padding='SAME') c12 = tf.nn.bias_add(c12, c12_b) c12 = tflearn.activations.prelu(c12) c13 = tf.nn.conv2d(c12, c13_w, strides=[1, 1, 1, 1], padding='SAME') c13 = tf.nn.bias_add(c13, c13_b) final_out = c13 return final_out, r
def layer_lstm_multi_get(self, data, filters, kernel, name="convlstm", get_last=True): #num_units = [32, 16] convlstm_mode = 2 if convlstm_mode == 1: cell1 = tf.contrib.rnn.ConvLSTMCell(2, self.shape + [self.channels], filters, kernel, name=name) cell2 = tf.contrib.rnn.ConvLSTMCell(2, self.shape + [self.channels], filters, kernel, name=name) else: cell1 = ConvLSTMCell(self.shape, filters, kernel) cell2 = ConvLSTMCell(self.shape, filters, kernel) #cell1 = ResidualWrapper(tf.contrib.rnn.ConvLSTMCell(2,self.shape + [self.channels], 7, kernel,name=name)) #cell2 = tf.contrib.rnn.ConvLSTMCell(2,self.shape + [self.channels], filters, kernel,name=name) #cells = [tf.contrib.rnn.ConvLSTMCell(2,self.shape + [self.channels], n, kernel,name=name+str(n)) for n in num_units] stacked_rnn_cell = MultiRNNCell([cell1, cell2]) #cell = tf.contrib.rnn.ConvLSTMCell(2,self.shape + [self.channels], filters, kernel,name=name) val, state = tf.nn.dynamic_rnn(stacked_rnn_cell, data, dtype=tf.float32) if self.debug: deb.prints(val.get_shape()) #kernel,bias=cell.variables #deb.prints(kernel.get_shape()) #self.hidden_weights = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, name) #tf.summary.histogram('convlstm', kernel) if get_last == True: if self.debug: deb.prints(val.get_shape()) last = tf.gather(val, int(val.get_shape()[1]) - 1, axis=1) if self.debug: deb.prints(last.get_shape()) return last else: return val
def cell2(input_, name="cell2"): with tf.variable_scope(name, reuse=None): aaa = input_.get_shape().as_list() conv_2 = tf.reshape( input_, [-1, aaa[1], aaa[2], int(aaa[3] / 8), 8]) #8是时刻,大小不要太大,会漫 conv_22 = tf.transpose(conv_2, perm=[0, 4, 1, 2, 3]) #cell2 = ConvLSTMCell([aaa[1],aaa[2]],128, [4,4]) cell2 = ConvLSTMCell([aaa[1], aaa[2]], 72, [4, 4]) outputs2, state2 = tf.nn.dynamic_rnn(cell2, conv_22, dtype=tf.float32) return state2[1] #o(t)
def create_model(self): with tf.variable_scope('conv_lstm_model'): cells = [] for i, each_filter in enumerate(self.filters): cell = ConvLSTMCell(self.shape, each_filter, self.kernel) cells.append(cell) cell = tf.nn.rnn_cell.MultiRNNCell(cells, state_is_tuple=True) states_series, current_state = tf.nn.dynamic_rnn(cell, self.inputs, dtype=self.inputs.dtype) # current_state => Not used ... self.model_output = states_series
def __init__(self, input_size, input_dim, hidden_dim, kernel_size, num_layers, batch_first=True, bias=True, return_all_layers=False): super(ConvLSTM, self).__init__() self._check_kernel_size_consistency(kernel_size) # Make sure that both `kernel_size` and `hidden_dim` are lists having len == num_layers kernel_size = self._extend_for_multilayer(kernel_size, num_layers) hidden_dim = self._extend_for_multilayer(hidden_dim, num_layers) if not len(kernel_size) == len(hidden_dim) == num_layers: raise ValueError('Inconsistent list length.') self.height, self.width = input_size self.input_dim = input_dim self.hidden_dim = hidden_dim self.kernel_size = kernel_size self.num_layers = num_layers self.batch_first = batch_first self.bias = bias self.return_all_layers = return_all_layers self.decay_func = "linear" #might be changed to exp or negative sigmoid cell_list = [] for i in range(0, self.num_layers): cur_input_dim = self.input_dim if i == 0 else self.hidden_dim[i - 1] cell_list.append( ConvLSTMCell(input_size=(self.height, self.width), input_dim=cur_input_dim, hidden_dim=self.hidden_dim[i], kernel_size=self.kernel_size[i], bias=self.bias)) #last conv layer padding_size = self.kernel_size[0][0] // 2, self.kernel_size[0][1] // 2 cell_list.append( nn.Conv2d( in_channels=hidden_dim[-1], out_channels=1, # precipitation value kernel_size=(3, 3), padding=1, bias=self.bias)) #module list is like a Python list. It is similar to forward, but forward has its embedded forward method, # whereas we should redefine our own in ModuleList self.cell_list = nn.ModuleList(cell_list) self._hidden = self._init_hidden(1)
def __init__(self): super(Model, self).__init__() ###declare some parameters that might be used self.conv_pad = 0 self.conv_kernel_size = 3 self.conv_stride = 1 self.pool_pad = 0 self.pool_kernel_size = 3 self.pool_stride = 3 self.hidden_size = 64 self.size = int((args.img_size + 2 * self.conv_pad - (self.conv_kernel_size - 1) - 1) / self.conv_stride + 1) self.size1 = int((self.size + 2 * self.pool_pad - (self.pool_kernel_size - 1) - 1) / self.pool_stride + 1) ###define layers self.conv = nn.Conv2d(in_channels=1, out_channels=8, kernel_size=3, stride=1, padding=0) self.pool = nn.MaxPool2d(kernel_size=3) self.convlstm1 = ConvLSTMCell(shape=[self.size1, self.size1], input_channel=8, filter_size=3, hidden_size=self.hidden_size) self.convlstm2 = ConvLSTMCell(shape=[self.size1, self.size1], input_channel=self.hidden_size, filter_size=3, hidden_size=self.hidden_size) self.deconv = nn.ConvTranspose2d( in_channels=self.hidden_size, out_channels=1, kernel_size=6, stride=3, padding=0, output_padding=1, ) self.relu = func.relu
def lstm_layer(self, H, W): with tf.variable_scope('lstm_model'): cells = [] for i, each_filter in enumerate(self.filters): cell = ConvLSTMCell([H, W], each_filter, self.kernel, reuse=tf.get_variable_scope().reuse) cells.append(cell) cell = tf.nn.rnn_cell.MultiRNNCell(cells, state_is_tuple=True) return cell
def t1(): # Add the ConvLSTM step. cell = ConvLSTMCell(shape, filters, kernel) outputs, state = tf.nn.dynamic_rnn(cell, inputs, dtype=inputs.dtype) with tf.Session() as sess: inp = np.random.normal(size=(batch_size, timesteps, height, width, channels)) sess.run(tf.global_variables_initializer()) output, cell_and_hidden_state = sess.run([outputs, state], feed_dict={inputs: inp}) print("output shape:", output.shape ) # output:[time, batch_size, height, width, width, num_filter] print("cell_and_hidden_state:", cell_and_hidden_state)
def conv_lstm_decoder(self, H, W, filter_size, kernel, decoder_input, enc_final_state): with tf.variable_scope('dec_lstm_model', reuse=self.reuse_conv_lstm_decoder): decoder_cell = ConvLSTMCell([H, W], filter_size, kernel, reuse=tf.get_variable_scope().reuse) decoder_outputs, _ = tf.nn.dynamic_rnn( cell=decoder_cell, inputs=decoder_input, initial_state=enc_final_state) self.reuse_conv_lstm_decoder = True return decoder_outputs
def net_bi_wcell(x, f, u, step, Height, Width, filter_num, kernel, relu, CNNlayer, peephole, scale): x1 = CNN(x, step, filter_num, filter_num, kernel, relu, CNNlayer, scale=scale, name="1") # print("CNN") inputs = tf.concat([x1, f, u], axis=-1) cell = ConvLSTMCell(shape=[Height, Width], filters=filter_num, kernel=kernel, peephole=peephole) x2, state = tf.nn.bidirectional_dynamic_rnn(cell, cell, inputs, dtype=inputs.dtype) # print("LSTM") x22 = tf.concat([x2[0], x2[1]], axis=4) x3 = CNN(x22, step, filter_num, 1, kernel, relu, CNNlayer, scale=False, name="2") # print("CNN") return x3
def t3(): # It's also possible to enter 2D input or 4D input instead of 3D. shape = [100] kernel = [3] inputs = tf.placeholder(tf.float32, [batch_size, timesteps] + shape + [channels]) cell = ConvLSTMCell(shape, filters, kernel) outputs, state = tf.nn.bidirectional_dynamic_rnn(cell, cell, inputs, dtype=inputs.dtype) with tf.Session() as sess: inp = np.random.normal(size=(batch_size, timesteps, height, width, channels)) sess.run(tf.global_variables_initializer()) output, cell_and_hidden_state = sess.run([outputs, state], feed_dict={inputs: inp}) print("output shape:", output.shape ) # output:[time, batch_size, height, width, width, num_filter] print("cell_and_hidden_state:", cell_and_hidden_state)
def __init__(self, batch_size, timesteps, shape, channels, kernel, filters, learning_rate): self.batch_size = batch_size self.timesteps = timesteps self.shape = shape self.channels = channels self.kernel = kernel self.filters = filters self.learning_rate = learning_rate self.children = tf.placeholder( np.float32, shape=([self.batch_size, self.timesteps] + self.shape + [self.channels])) # Create a placeholder for videos. self.inputs = tf.placeholder(tf.float32, [self.batch_size, self.timesteps] + self.shape + [self.channels]) # Add the ConvLSTM step. cell = ConvLSTMCell(self.shape, self.filters, self.kernel) self.outputs, self.state = tf.nn.dynamic_rnn(cell, self.inputs, dtype=self.inputs.dtype) self.result = tf.nn.relu(self.outputs) self.label = tf.placeholder(np.float32, [self.batch_size, self.timesteps] + self.shape + [self.channels]) # label = tf.reshape(self.target, (self.batch_size, self.time_steps)) self.cost = tf.losses.mean_squared_error(self.label, self.result) self.optimizer = tf.train.AdamOptimizer(self.learning_rate).minimize( self.cost) self.Persistent() self.Persistent_optimizer() init = tf.global_variables_initializer() self.sess = tf.Session() self.sess.run(init)
def layer_lstm_get(self, data, filters, kernel, name="convlstm", get_last=True): #filters=64 #cell = ResidualWrapper(tf.contrib.rnn.ConvLSTMCell(2,self.shape + [self.channels], filters, kernel,name=name)) #cell = HighwayWrapper(tf.contrib.rnn.ConvLSTMCell(2,self.shape + [self.channels], filters, kernel,name=name)) convlstm_mode = 2 if convlstm_mode == 1: cell = tf.contrib.rnn.ConvLSTMCell(2, self.shape + [self.channels], filters, kernel, name=name) else: cell = ConvLSTMCell(self.shape, filters, kernel) #cell = ConvGRUCell(self.shape, filters, kernel) val, state = tf.nn.dynamic_rnn(cell, data, dtype=tf.float32) if self.debug: deb.prints(val.get_shape()) data_last = tf.gather(data, int(data.get_shape()[1]) - 1, axis=1) deb.prints(data_last.get_shape()) if convlstm_mode == 1: kernel, bias = cell.variables else: kernel = cell.variables #kernel,bias=cell.variables ##deb.prints(kernel.get_shape()) #self.hidden_weights = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, name) tf.summary.histogram('convlstm', kernel[0]) if get_last == True: if self.debug: deb.prints(val.get_shape()) last = tf.gather(val, int(val.get_shape()[1]) - 1, axis=1) if self.debug: deb.prints(last.get_shape()) return last else: return val
# cell = ConvLSTMCell([512, 512], 1, [3, 3]) # outputs, state = tf.nn.dynamic_rnn(cell, inputs, dtype=inputs.dtype, scope='rnn') # rnn_output = tf.squeeze(outputs, axis=0) # # inputs2 = tf.expand_dims(tf.concat([up_fc8, up_fc8_r2], axis=3), 0) # cell2 = ConvLSTMCell([512, 512], 1, [3, 3]) # outputs2, state2 = tf.nn.dynamic_rnn(cell2, inputs2, dtype=inputs.dtype, scope='rnn2') # rnn_output2 = tf.squeeze(outputs2, axis=0) inputs = tf.expand_dims( tf.concat([ scale1_score, scale2_score, scale3_score, scale4_score, scale5_score, scale6_score ], axis=3), 0) cell = ConvLSTMCell([512, 512], 6, [3, 3]) # conv3D_w = tf.Variable(tf.truncated_normal(shape=[3, 3, 3, 4, 1]), # dtype=tf.float32, name='3D_conv_w') # conv3D_b = tf.Variable(tf.truncated_normal(shape=[1, 1, 1, 1, 1]), dtype=tf.float32, name='3D_conv_b') # C3D_outputs = tf.nn.conv3d(inputs, conv3D_w, strides=[1, 1, 1, 1, 1], padding='SAME', name='C3D') + conv3D_b # outputs_static, outputs_dynamic = rnn_cell(inputs, 'rnn') outputs, state = tf.nn.dynamic_rnn(cell, inputs, dtype=inputs.dtype, scope='rnn') rnn_output = tf.squeeze(outputs, axis=0) # C3D_output = tf.squeeze(C3D_outputs, axis=0)
def build_ST_RNN(self): ############### Input ############### self.X = tf.placeholder( tf.float32, [self.batch_size, self.crop_size, self.crop_size, 4], name='rgb_prior_image') self.Y = tf.placeholder( tf.float32, [self.batch_size, self.crop_size, self.crop_size, 1], name='gt') size = 512 conv1_1_r2 = tf.nn.relu( self.conv2d(self.X, [3, 3, 4, 64], 'conv1_1_r2')) conv1_2_r2 = tf.nn.relu( self.conv2d(conv1_1_r2, [3, 3, 64, 64], 'conv1_2_r2')) pool1_r2 = tf.nn.max_pool(conv1_2_r2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool1_r2') conv2_1_r2 = tf.nn.relu( self.conv2d(pool1_r2, [3, 3, 64, 128], 'conv2_1_r2')) conv2_2_r2 = tf.nn.relu( self.conv2d(conv2_1_r2, [3, 3, 128, 128], 'conv2_2_r2')) pool2_r2 = tf.nn.max_pool(conv2_2_r2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool2_r2') conv3_1_r2 = tf.nn.relu( self.conv2d(pool2_r2, [3, 3, 128, 256], 'conv3_1_r2')) conv3_2_r2 = tf.nn.relu( self.conv2d(conv3_1_r2, [3, 3, 256, 256], 'conv3_2_r2')) conv3_3_r2 = tf.nn.relu( self.conv2d(conv3_2_r2, [3, 3, 256, 256], 'conv3_3_r2')) pool3_r2 = tf.nn.max_pool(conv3_3_r2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool3_r2') conv4_1_r2 = tf.nn.relu( self.conv2d(pool3_r2, [3, 3, 256, 512], 'conv4_1_r2')) conv4_2_r2 = tf.nn.relu( self.conv2d(conv4_1_r2, [3, 3, 512, 512], 'conv4_2_r2')) conv4_3_r2 = tf.nn.relu( self.conv2d(conv4_2_r2, [3, 3, 512, 512], 'conv4_3_r2')) pool4_r2 = tf.nn.max_pool(conv4_3_r2, ksize=[1, 3, 3, 1], strides=[1, 1, 1, 1], padding='SAME', name='pool4_r2') conv5_1_r2 = tf.nn.relu( self.astro_conv2d(pool4_r2, [3, 3, 512, 512], hole=2, name='conv5_1_r2')) conv5_2_r2 = tf.nn.relu( self.astro_conv2d(conv5_1_r2, [3, 3, 512, 512], hole=2, name='conv5_2_r2')) conv5_3_r2 = tf.nn.relu( self.astro_conv2d(conv5_2_r2, [3, 3, 512, 512], hole=2, name='conv5_3_r2')) pool5_r2 = tf.nn.max_pool(conv5_3_r2, ksize=[1, 3, 3, 1], strides=[1, 1, 1, 1], padding='SAME') fc6_r2 = tf.nn.relu( self.astro_conv2d(pool5_r2, [4, 4, 512, 4096], hole=4, name='fc6_r2')) fc6_dropout_r2 = tf.nn.dropout(fc6_r2, 0.5) fc7_r2 = tf.nn.relu( self.astro_conv2d(fc6_dropout_r2, [1, 1, 4096, 4096], hole=4, name='fc7_r2')) fc7_dropout_r2 = tf.nn.dropout(fc7_r2, 0.5) fc8_r2 = self.conv2d(fc7_dropout_r2, [1, 1, 4096, 1], 'fc8_r2') pool5_conv_r2 = tf.nn.dropout( tf.nn.relu(self.conv2d(pool5_r2, [3, 3, 512, 128], 'pool5_conv_r2')), 0.5) pool5_fc_r2 = tf.nn.dropout( tf.nn.relu( self.conv2d(pool5_conv_r2, [1, 1, 128, 128], 'pool5_fc_r2')), 0.5) pool5_ms_saliency_r2 = self.conv2d(pool5_fc_r2, [1, 1, 128, 1], 'pool5_ms_saliency_r2') pool4_conv_r2 = tf.nn.dropout( tf.nn.relu(self.conv2d(pool4_r2, [3, 3, 512, 128], 'pool4_conv_r2')), 0.5) pool4_fc_r2 = tf.nn.dropout( tf.nn.relu( self.conv2d(pool4_conv_r2, [1, 1, 128, 128], 'pool4_fc_r2')), 0.5) pool4_ms_saliency_r2 = self.conv2d(pool4_fc_r2, [1, 1, 128, 1], 'pool4_ms_saliency_r2') pool3_conv_r2 = tf.nn.dropout( tf.nn.relu(self.conv2d(pool3_r2, [3, 3, 256, 128], 'pool3_conv_r2')), 0.5) pool3_fc_r2 = tf.nn.dropout( tf.nn.relu( self.conv2d(pool3_conv_r2, [1, 1, 128, 128], 'pool3_fc_r2')), 0.5) pool3_ms_saliency_r2 = self.conv2d(pool3_fc_r2, [1, 1, 128, 1], 'pool3_ms_saliency_r2') pool2_conv_r2 = tf.nn.dropout( tf.nn.relu(self.conv2d(pool2_r2, [3, 3, 128, 128], 'pool2_conv_r2')), 0.5) pool2_fc_r2 = tf.nn.dropout( tf.nn.relu( self.conv2d(pool2_conv_r2, [1, 1, 128, 128], 'pool2_fc_r2')), 0.5) pool2_ms_saliency_r2 = self.conv2d(pool2_fc_r2, [1, 1, 128, 1], 'pool2_ms_saliency_r2') pool1_conv_r2 = tf.nn.dropout( tf.nn.relu(self.conv2d(pool1_r2, [3, 3, 64, 128], 'pool1_conv_r2')), 0.5) pool1_fc_r2 = tf.nn.dropout( tf.nn.relu( self.conv2d(pool1_conv_r2, [1, 1, 128, 128], 'pool1_fc_r2')), 0.5) pool1_ms_saliency_r2 = self.conv2d(pool1_fc_r2, [1, 1, 128, 1], 'pool1_ms_saliency_r2') ########## DSS structure ########## # fc8 scale6_score = tf.image.resize_bilinear(fc8_r2, [size, size]) scale5_score = self.conv2d( tf.concat([fc8_r2, pool5_ms_saliency_r2], axis=3), [1, 1, 2, 1], 'scale5') scale5_score = tf.image.resize_bilinear(scale5_score, [size, size]) # fc8 + pool5 + pool4 scale4_score = self.conv2d( tf.concat([fc8_r2, pool5_ms_saliency_r2, pool4_ms_saliency_r2], axis=3), [1, 1, 3, 1], 'scale4') scale4_score = tf.image.resize_bilinear(scale4_score, [size, size]) # fc8 + pool5 + pool4 + pool3 scale3_score = self.conv2d( tf.concat([ fc8_r2, pool5_ms_saliency_r2, pool4_ms_saliency_r2, pool3_ms_saliency_r2 ], axis=3), [1, 1, 4, 1], 'scale3') scale3_score = tf.image.resize_bilinear(scale3_score, [size, size]) # fc8 + pool5 + pool4 + pool3 + pool2 pool2_size = pool2_ms_saliency_r2.get_shape().as_list() up2_fc8 = tf.image.resize_bilinear(fc8_r2, [pool2_size[1], pool2_size[2]]) up2_pool5 = tf.image.resize_bilinear(pool5_ms_saliency_r2, [pool2_size[1], pool2_size[2]]) up2_pool4 = tf.image.resize_bilinear(pool4_ms_saliency_r2, [pool2_size[1], pool2_size[2]]) up2_pool3 = tf.image.resize_bilinear(pool3_ms_saliency_r2, [pool2_size[1], pool2_size[2]]) scale2_score = self.conv2d( tf.concat([ up2_fc8, up2_pool5, up2_pool4, up2_pool3, pool2_ms_saliency_r2 ], axis=3), [1, 1, 5, 1], 'scale2') scale2_score = tf.image.resize_bilinear(scale2_score, [size, size]) # fc8 + pool5 + pool4 + pool3 + pool2 + pool1 pool1_size = pool1_ms_saliency_r2.get_shape().as_list() up1_fc8 = tf.image.resize_bilinear(fc8_r2, [pool1_size[1], pool1_size[2]]) up1_pool5 = tf.image.resize_bilinear(pool5_ms_saliency_r2, [pool1_size[1], pool1_size[2]]) up1_pool4 = tf.image.resize_bilinear(pool4_ms_saliency_r2, [pool1_size[1], pool1_size[2]]) up1_pool3 = tf.image.resize_bilinear(pool3_ms_saliency_r2, [pool1_size[1], pool1_size[2]]) up1_pool2 = tf.image.resize_bilinear(pool2_ms_saliency_r2, [pool1_size[1], pool1_size[2]]) scale1_score = self.conv2d( tf.concat([ up1_fc8, up1_pool5, up1_pool4, up1_pool3, up1_pool2, pool1_ms_saliency_r2 ], axis=3), [1, 1, 6, 1], 'scale1') scale1_score = tf.image.resize_bilinear(scale1_score, [size, size]) ########## rnn fusion ############ inputs = tf.expand_dims( tf.concat([ scale1_score, scale2_score, scale3_score, scale4_score, scale5_score, scale6_score ], axis=3), 0) cell = ConvLSTMCell([512, 512], 6, [3, 3]) outputs, state = tf.nn.dynamic_rnn(cell, inputs, dtype=inputs.dtype, scope='rnn') rnn_output = tf.squeeze(outputs, axis=0) final_fusion = self.conv2d(rnn_output, [1, 1, 6, 1], 'final_saliency') self.final_saliency = tf.sigmoid(final_fusion) self.saver = tf.train.Saver() self.loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits( logits=final_fusion, labels=self.Y), name='loss') # self.loss = tf.reduce_mean(tf.nn.weighted_cross_entropy_with_logits(logits=final_saliency, targets=self.Y, pos_weight=0.12), name='loss') # self.loss_rnn = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=C3D_output, labels=self.Y), # name='loss2') # tf.summary.scalar('entropy', self.loss + 0.1 * self.loss_rnn) tf.summary.scalar('entropy', self.loss) trainable_var = tf.trainable_variables() optimizer = tf.train.AdamOptimizer(self.lr, name='optimizer') # grads = optimizer.compute_gradients(self.loss + 1 * self.loss_rnn, var_list=trainable_var) grads = optimizer.compute_gradients(self.loss, var_list=trainable_var) # grads = optimizer.compute_gradients(self.loss + 0.5 * self.loss_rnn, var_list=trainable_var[-22:]) # optimizer2 = tf.train.MomentumOptimizer(self.lr, 0.9) # grads = optimizer2.compute_gradients(self.loss + 0.5 * self.loss_rnn, var_list=trainable_var[-22:]) # grads = optimizer2.compute_gradients(self.loss, var_list=trainable_var) self.train_op = optimizer.apply_gradients(grads)
def create_conv_net(x, keep_prob, channels, n_class, layers=3, features_root=16, filter_size=3, pool_size=2, summaries=True): """ Creates a new convolutional unet for the given parametrization. :param x: input tensor, shape [?,nx,ny,channels] :param keep_prob: dropout probability tensor :param channels: number of channels in the input image :param n_class: number of output labels :param layers: number of layers in the net :param features_root: number of features in the first layer :param filter_size: size of the convolution filter :param pool_size: size of the max pooling operation :param summaries: Flag if summaries should be created """ logging.info( "Layers {layers}, features {features}, filter size {filter_size}x{filter_size}, pool size: {pool_size}x{pool_size}" .format(layers=layers, features=features_root, filter_size=filter_size, pool_size=pool_size)) # Placeholder for the input image nx = tf.shape(x)[1] ny = tf.shape(x)[2] x_image = tf.reshape(x, tf.stack([-1, nx, ny, channels])) in_node = x_image batch_size = tf.shape(x_image)[0] weights = [] biases = [] convs = [] pools = OrderedDict() deconv = OrderedDict() dw_h_convs = OrderedDict() up_h_convs = OrderedDict() in_size = 1000 size = in_size # For convlstm cell = ConvLSTMCell([121, 121], 64, [3, 3]) # down layers for layer in range(0, layers): features = 2**layer * features_root stddev = np.sqrt(2 / (filter_size**2 * features)) if layer == 0: w1 = weight_variable( [filter_size, filter_size, channels, features], stddev) else: w1 = weight_variable( [filter_size, filter_size, features // 2, features], stddev) w2 = weight_variable([filter_size, filter_size, features, features], stddev) b1 = bias_variable([features]) b2 = bias_variable([features]) conv1 = conv2d(in_node, w1, keep_prob) tmp_h_conv = tf.nn.relu(conv1 + b1) conv2 = conv2d(tmp_h_conv, w2, keep_prob) dw_h_convs[layer] = tf.nn.relu(conv2 + b2) weights.append((w1, w2)) biases.append((b1, b2)) convs.append((conv1, conv2)) size -= 4 if layer < layers - 1: pools[layer] = max_pool(dw_h_convs[layer], pool_size) in_node = pools[layer] size /= 2 in_node = dw_h_convs[layers - 1] # For conv lstm # Example: batch_size 3, n_step, 5, in_node = tf.reshape(in_node, [3, 5, 121, 121, 64]) outputs, state = tf.nn.dynamic_rnn(cell, in_node, dtype=in_node.dtype) in_node = tf.reshape(outputs, [15, 121, 121, 64]) # up layers for layer in range(layers - 2, -1, -1): features = 2**(layer + 1) * features_root stddev = np.sqrt(2 / (filter_size**2 * features)) wd = weight_variable_devonc( [pool_size, pool_size, features // 2, features], stddev) bd = bias_variable([features // 2]) h_deconv = tf.nn.relu(deconv2d(in_node, wd, pool_size) + bd) h_deconv_concat = crop_and_concat(dw_h_convs[layer], h_deconv) deconv[layer] = h_deconv_concat w1 = weight_variable( [filter_size, filter_size, features, features // 2], stddev) w2 = weight_variable( [filter_size, filter_size, features // 2, features // 2], stddev) b1 = bias_variable([features // 2]) b2 = bias_variable([features // 2]) conv1 = conv2d(h_deconv_concat, w1, keep_prob) h_conv = tf.nn.relu(conv1 + b1) conv2 = conv2d(h_conv, w2, keep_prob) in_node = tf.nn.relu(conv2 + b2) up_h_convs[layer] = in_node weights.append((w1, w2)) biases.append((b1, b2)) convs.append((conv1, conv2)) size *= 2 size -= 4 # Output Map weight = weight_variable([1, 1, features_root, n_class], stddev) bias = bias_variable([n_class]) conv = conv2d(in_node, weight, tf.constant(1.0)) output_map = tf.nn.relu(conv + bias) up_h_convs["out"] = output_map if summaries: for i, (c1, c2) in enumerate(convs): tf.summary.image('summary_conv_%02d_01' % i, get_image_summary(c1)) tf.summary.image('summary_conv_%02d_02' % i, get_image_summary(c2)) for k in pools.keys(): tf.summary.image('summary_pool_%02d' % k, get_image_summary(pools[k])) for k in deconv.keys(): tf.summary.image('summary_deconv_concat_%02d' % k, get_image_summary(deconv[k])) for k in dw_h_convs.keys(): tf.summary.histogram("dw_convolution_%02d" % k + '/activations', dw_h_convs[k]) for k in up_h_convs.keys(): tf.summary.histogram("up_convolution_%s" % k + '/activations', up_h_convs[k]) variables = [] for w1, w2 in weights: variables.append(w1) variables.append(w2) for b1, b2 in biases: variables.append(b1) variables.append(b2) return output_map, variables, int(in_size - size)
def build_ST_RNN_regression(self): ############### Input ############### self.X = tf.placeholder( tf.float32, [self.batch_size, self.crop_size, self.crop_size, 3], name='rgb_image') self.Y = tf.placeholder( tf.float32, [self.batch_size, self.crop_size, self.crop_size, 1], name='gt') if (self.prior_type == 'prior'): self.X_prior = tf.placeholder( tf.float32, [self.batch_size, self.crop_size, self.crop_size, 4], name='rgb_prior_image') else: self.X_prior = tf.placeholder( tf.float32, [self.batch_size, self.crop_size, self.crop_size, 3], name='rgb_flow_image') self.gt_points = tf.placeholder(tf.float32, [self.batch_size, 2], name='center_points') ############### R1 ############### conv1_1 = tf.nn.relu(self.conv2d(self.X, [3, 3, 3, 64], 'conv1_1')) conv1_2 = tf.nn.relu(self.conv2d(conv1_1, [3, 3, 64, 64], 'conv1_2')) pool1 = tf.nn.max_pool(conv1_2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool1') conv2_1 = tf.nn.relu(self.conv2d(pool1, [3, 3, 64, 128], 'conv2_1')) conv2_2 = tf.nn.relu(self.conv2d(conv2_1, [3, 3, 128, 128], 'conv2_2')) pool2 = tf.nn.max_pool(conv2_2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool2') conv3_1 = tf.nn.relu(self.conv2d(pool2, [3, 3, 128, 256], 'conv3_1')) conv3_2 = tf.nn.relu(self.conv2d(conv3_1, [3, 3, 256, 256], 'conv3_2')) conv3_3 = tf.nn.relu(self.conv2d(conv3_2, [3, 3, 256, 256], 'conv3_3')) pool3 = tf.nn.max_pool(conv3_3, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool3') conv4_1 = tf.nn.relu(self.conv2d(pool3, [3, 3, 256, 512], 'conv4_1')) conv4_2 = tf.nn.relu(self.conv2d(conv4_1, [3, 3, 512, 512], 'conv4_2')) conv4_3 = tf.nn.relu(self.conv2d(conv4_2, [3, 3, 512, 512], 'conv4_3')) pool4 = tf.nn.max_pool(conv4_3, ksize=[1, 3, 3, 1], strides=[1, 1, 1, 1], padding='SAME', name='pool4') conv5_1 = tf.nn.relu( self.astro_conv2d(pool4, [3, 3, 512, 512], hole=2, name='conv5_1')) conv5_2 = tf.nn.relu( self.astro_conv2d(conv5_1, [3, 3, 512, 512], hole=2, name='conv5_2')) conv5_3 = tf.nn.relu( self.astro_conv2d(conv5_2, [3, 3, 512, 512], hole=2, name='conv5_3')) pool5 = tf.nn.max_pool(conv5_3, ksize=[1, 3, 3, 1], strides=[1, 1, 1, 1], padding='SAME') fc6 = tf.nn.relu( self.astro_conv2d(pool5, [4, 4, 512, 4096], hole=4, name='fc6')) fc6_dropout = tf.nn.dropout(fc6, 0.5) fc7 = tf.nn.relu( self.astro_conv2d(fc6_dropout, [1, 1, 4096, 4096], hole=4, name='fc7')) fc7_dropout = tf.nn.dropout(fc7, 0.5) fc8 = self.conv2d(fc7_dropout, [1, 1, 4096, 1], 'fc8') up_fc8 = tf.image.resize_bilinear(fc8, [128, 128]) pool4_conv = tf.nn.dropout( tf.nn.relu(self.conv2d(pool4, [3, 3, 512, 128], 'pool4_conv')), 0.5) pool4_fc = tf.nn.dropout( tf.nn.relu(self.conv2d(pool4_conv, [1, 1, 128, 128], 'pool4_fc')), 0.5) pool4_ms_saliency = self.conv2d(pool4_fc, [1, 1, 128, 1], 'pool4_ms_saliency') # rnn_output_pool4 = self.rnn_cell(pool4_ms_saliency, 'pool4') # pool4_ms_saliency = tf.add(rnn_output_pool4, pool4_ms_saliency) up_pool4 = tf.image.resize_bilinear(pool4_ms_saliency, [128, 128]) # final_saliency_r1 = tf.add(up_pool4, up_fc8) ############### R2 ############### conv1_1_r2 = tf.nn.relu( self.conv2d(self.X_prior, [3, 3, 4, 64], 'conv1_1_r2')) conv1_2_r2 = tf.nn.relu( self.conv2d(conv1_1_r2, [3, 3, 64, 64], 'conv1_2_r2')) pool1_r2 = tf.nn.max_pool(conv1_2_r2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool1_r2') conv2_1_r2 = tf.nn.relu( self.conv2d(pool1_r2, [3, 3, 64, 128], 'conv2_1_r2')) conv2_2_r2 = tf.nn.relu( self.conv2d(conv2_1_r2, [3, 3, 128, 128], 'conv2_2_r2')) pool2_r2 = tf.nn.max_pool(conv2_2_r2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool2_r2') conv3_1_r2 = tf.nn.relu( self.conv2d(pool2_r2, [3, 3, 128, 256], 'conv3_1_r2')) conv3_2_r2 = tf.nn.relu( self.conv2d(conv3_1_r2, [3, 3, 256, 256], 'conv3_2_r2')) conv3_3_r2 = tf.nn.relu( self.conv2d(conv3_2_r2, [3, 3, 256, 256], 'conv3_3_r2')) pool3_r2 = tf.nn.max_pool(conv3_3_r2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool3_r2') conv4_1_r2 = tf.nn.relu( self.conv2d(pool3_r2, [3, 3, 256, 512], 'conv4_1_r2')) conv4_2_r2 = tf.nn.relu( self.conv2d(conv4_1_r2, [3, 3, 512, 512], 'conv4_2_r2')) conv4_3_r2 = tf.nn.relu( self.conv2d(conv4_2_r2, [3, 3, 512, 512], 'conv4_3_r2')) pool4_r2 = tf.nn.max_pool(conv4_3_r2, ksize=[1, 3, 3, 1], strides=[1, 1, 1, 1], padding='SAME', name='pool4_r2') conv5_1_r2 = tf.nn.relu( self.astro_conv2d(pool4_r2, [3, 3, 512, 512], hole=2, name='conv5_1_r2')) conv5_2_r2 = tf.nn.relu( self.astro_conv2d(conv5_1_r2, [3, 3, 512, 512], hole=2, name='conv5_2_r2')) conv5_3_r2 = tf.nn.relu( self.astro_conv2d(conv5_2_r2, [3, 3, 512, 512], hole=2, name='conv5_3_r2')) pool5_r2 = tf.nn.max_pool(conv5_3_r2, ksize=[1, 3, 3, 1], strides=[1, 1, 1, 1], padding='SAME') fc6_r2 = tf.nn.relu( self.astro_conv2d(pool5_r2, [4, 4, 512, 4096], hole=4, name='fc6_r2')) fc6_dropout_r2 = tf.nn.dropout(fc6_r2, 0.5) fc7_r2 = tf.nn.relu( self.astro_conv2d(fc6_dropout_r2, [1, 1, 4096, 4096], hole=4, name='fc7_r2')) fc7_dropout_r2 = tf.nn.dropout(fc7_r2, 0.5) fc8_r2 = self.conv2d(fc7_dropout_r2, [1, 1, 4096, 1], 'fc8_r2') up_fc8_r2 = tf.image.resize_bilinear(fc8_r2, [128, 128]) pool4_conv_r2 = tf.nn.dropout( tf.nn.relu(self.conv2d(pool4_r2, [3, 3, 512, 128], 'pool4_conv_r2')), 0.5) pool4_fc_r2 = tf.nn.dropout( tf.nn.relu( self.conv2d(pool4_conv_r2, [1, 1, 128, 128], 'pool4_fc_r2')), 0.5) pool4_ms_saliency_r2 = self.conv2d(pool4_fc_r2, [1, 1, 128, 1], 'pool4_ms_saliency_r2') up_pool4_r2 = tf.image.resize_bilinear(pool4_ms_saliency_r2, [128, 128]) final_saliency_r2 = tf.add(up_pool4_r2, up_fc8_r2) ########## rnn fusion ############ inputs = tf.expand_dims( tf.concat([up_pool4, up_pool4_r2, up_fc8, up_fc8_r2], axis=3), 0) cell = ConvLSTMCell([128, 128], 1, [3, 3]) outputs, state = tf.nn.dynamic_rnn(cell, inputs, dtype=inputs.dtype, scope='rnn') rnn_output = tf.squeeze(outputs, axis=0) up_rnn_output = tf.image.resize_bilinear(rnn_output, [512, 512]) ########## C3D fusion ############ # inputs = tf.expand_dims(tf.concat([up_pool4, up_pool4_r2, up_fc8, up_fc8_r2], axis=3), 0) C3D_outputs = self.conv3d(inputs, [3, 3, 3, 4, 1], name='3D_conv') C3D_output = tf.squeeze(C3D_outputs, axis=0) # up_C3D_output = tf.image.resize_bilinear(C3D_output, [self.crop_size, self.crop_size]) # up2_C3D_output = tf.image.resize_bilinear(C3D_output, [128, 128]) ########## rnn fusion + attetion ############ # inputs = tf.concat([up_pool4, up_pool4_r2, up_fc8, up_fc8_r2], axis=3) # # outputs_static, outputs_dynamic = self.rnn_cell(inputs, 'rnn') # # attention_static = tf.multiply(tf.add(up_pool4, up_fc8), outputs_static) # attention_dynamic = tf.multiply(tf.add(up_pool4_r2, up_fc8_r2), outputs_dynamic) # rnn_output = tf.add(attention_static, attention_dynamic) ########### ST fusion ############ pool4_saliency_cancat = tf.concat( [pool4_ms_saliency, pool4_ms_saliency_r2], 3, name='concat_pool4') pool4_saliency_ST = self.conv2d(pool4_saliency_cancat, [1, 1, 2, 1], 'pool4_saliency_ST') # up_pool4_ST = tf.image.resize_bilinear(pool4_saliency_ST, [512, 512]) fc8_concat = tf.concat([fc8, fc8_r2], 3, name='concat_fc8') fc8_saliency_ST = self.conv2d(fc8_concat, [1, 1, 2, 1], 'fc8_saliency_ST') # up_fc8_ST = tf.image.resize_bilinear(fc8_saliency_ST, [512, 512]) pool4_fc8_combine = tf.add(pool4_saliency_ST, fc8_saliency_ST) pool4_fc8_combine = tf.image.resize_bilinear(pool4_fc8_combine, [128, 128]) # up2_pool4_fc8_combine = tf.image.resize_bilinear(pool4_fc8_combine, [128, 128]) ########## salient object center regression ############ pool4_saliency_ST_flatten = tf.reshape(pool4_saliency_ST, [4, -1]) center_regression = self.fc(pool4_saliency_ST_flatten, [4096, 2], name='crop_location_fc') # local feature maps generation mask = tf.py_func(gaussian_mask, [center_regression], tf.float32) pool4_fc8_combine_local = tf.multiply(pool4_fc8_combine, mask) C3D_output_local = tf.multiply(C3D_output, mask) rnn_output_local = tf.multiply(rnn_output, mask) ########### attetion fusion ############ motion_cancat = tf.concat([ pool4_fc8_combine, C3D_output, rnn_output, pool4_fc8_combine_local, C3D_output_local, rnn_output_local ], axis=3) up_motion_concat = tf.image.resize_bilinear(motion_cancat, [512, 512]) # attention_first = tf.nn.dropout(tf.nn.relu(self.conv2d(motion_cancat, [3, 3, 3, 128], 'attention_conv1')), 0.5) attention_first = tf.nn.relu( self.conv2d(motion_cancat, [3, 3, 6, 256], 'attention_conv1')) attention_second = tf.nn.softmax( self.conv2d(attention_first, [1, 1, 256, 6], 'attention_conv2')) up_attention = tf.image.resize_bilinear(attention_second, [512, 512]) final_fusion = tf.multiply(up_motion_concat, up_attention) final_saliency = tf.reduce_sum(final_fusion, axis=3, keep_dims=True) ave_num = tf.constant( 3.0, dtype=tf.float32, shape=[self.batch_size, self.crop_size, self.crop_size, 1]) final_saliency = tf.div(final_saliency, ave_num) self.mask = mask self.combine_local = pool4_fc8_combine self.C3D_output_local = pool4_fc8_combine_local self.rnn_output_local = rnn_output_local self.final_saliency = tf.sigmoid(final_saliency) self.up_fc8 = up_fc8 self.rnn_output = final_saliency self.saver = tf.train.Saver() self.loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits( logits=final_saliency, labels=self.Y), name='loss') # self.loss_rnn = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=rnn_output, labels=self.Y), # name='loss2') self.center_point_loss = tf.reduce_mean(tf.losses.huber_loss( labels=self.gt_points, predictions=center_regression), name='regress_loss') tf.summary.scalar('entropy', self.loss) # tf.summary.scalar('entropy', self.center_point_loss) optimizer = tf.train.AdamOptimizer(self.lr, name='optimizer') trainable_var = tf.trainable_variables() # grads = optimizer.compute_gradients(self.center_point_loss, var_list=trainable_var) grads = optimizer.compute_gradients(self.loss + 0 * self.center_point_loss, var_list=trainable_var) # grads = optimizer.compute_gradients(self.loss, var_list=trainable_var) self.train_op = optimizer.apply_gradients(grads)
def build_ST_RNN_drop_path(self): ############### Input ############### self.X = tf.placeholder( tf.float32, [self.batch_size, self.crop_size, self.crop_size, 3], name='rgb_image') self.Y = tf.placeholder( tf.float32, [self.batch_size, self.crop_size, self.crop_size, 1], name='gt') if (self.prior_type == 'prior'): self.X_prior = tf.placeholder( tf.float32, [self.batch_size, self.crop_size, self.crop_size, 4], name='rgb_prior_image') else: self.X_prior = tf.placeholder( tf.float32, [self.batch_size, self.crop_size, self.crop_size, 3], name='rgb_flow_image') self.fusion_weight = tf.placeholder( tf.float32, [self.batch_size, self.crop_size, self.crop_size, 3], name='fusion_weight') ############### R1 ############### conv1_1 = tf.nn.relu(self.conv2d(self.X, [3, 3, 3, 64], 'conv1_1')) conv1_2 = tf.nn.relu(self.conv2d(conv1_1, [3, 3, 64, 64], 'conv1_2')) pool1 = tf.nn.max_pool(conv1_2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool1') conv2_1 = tf.nn.relu(self.conv2d(pool1, [3, 3, 64, 128], 'conv2_1')) conv2_2 = tf.nn.relu(self.conv2d(conv2_1, [3, 3, 128, 128], 'conv2_2')) pool2 = tf.nn.max_pool(conv2_2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool2') conv3_1 = tf.nn.relu(self.conv2d(pool2, [3, 3, 128, 256], 'conv3_1')) conv3_2 = tf.nn.relu(self.conv2d(conv3_1, [3, 3, 256, 256], 'conv3_2')) conv3_3 = tf.nn.relu(self.conv2d(conv3_2, [3, 3, 256, 256], 'conv3_3')) pool3 = tf.nn.max_pool(conv3_3, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool3') conv4_1 = tf.nn.relu(self.conv2d(pool3, [3, 3, 256, 512], 'conv4_1')) conv4_2 = tf.nn.relu(self.conv2d(conv4_1, [3, 3, 512, 512], 'conv4_2')) conv4_3 = tf.nn.relu(self.conv2d(conv4_2, [3, 3, 512, 512], 'conv4_3')) pool4 = tf.nn.max_pool(conv4_3, ksize=[1, 3, 3, 1], strides=[1, 1, 1, 1], padding='SAME', name='pool4') conv5_1 = tf.nn.relu( self.astro_conv2d(pool4, [3, 3, 512, 512], hole=2, name='conv5_1')) conv5_2 = tf.nn.relu( self.astro_conv2d(conv5_1, [3, 3, 512, 512], hole=2, name='conv5_2')) conv5_3 = tf.nn.relu( self.astro_conv2d(conv5_2, [3, 3, 512, 512], hole=2, name='conv5_3')) pool5 = tf.nn.max_pool(conv5_3, ksize=[1, 3, 3, 1], strides=[1, 1, 1, 1], padding='SAME') fc6 = tf.nn.relu( self.astro_conv2d(pool5, [4, 4, 512, 4096], hole=4, name='fc6')) fc6_dropout = tf.nn.dropout(fc6, 0.5) fc7 = tf.nn.relu( self.astro_conv2d(fc6_dropout, [1, 1, 4096, 4096], hole=4, name='fc7')) fc7_dropout = tf.nn.dropout(fc7, 0.5) fc8 = self.conv2d(fc7_dropout, [1, 1, 4096, 1], 'fc8') # rnn_output_fc8 = self.rnn_cell(fc8, 'fc8') # fc8 = tf.add(rnn_output_fc8, fc8) up_fc8 = tf.image.resize_bilinear(fc8, [self.crop_size, self.crop_size]) pool4_conv = tf.nn.dropout( tf.nn.relu(self.conv2d(pool4, [3, 3, 512, 128], 'pool4_conv')), 0.5) pool4_fc = tf.nn.dropout( tf.nn.relu(self.conv2d(pool4_conv, [1, 1, 128, 128], 'pool4_fc')), 0.5) pool4_ms_saliency = self.conv2d(pool4_fc, [1, 1, 128, 1], 'pool4_ms_saliency') up_pool4 = tf.image.resize_bilinear(pool4_ms_saliency, [self.crop_size, self.crop_size]) pool3_conv = tf.nn.dropout( tf.nn.relu(self.conv2d(pool3, [3, 3, 256, 128], 'pool3_conv')), 0.5) pool3_fc = tf.nn.dropout( tf.nn.relu(self.conv2d(pool3_conv, [1, 1, 128, 128], 'pool3_fc')), 0.5) pool3_ms_saliency = self.conv2d(pool3_fc, [1, 1, 128, 1], 'pool3_ms_saliency') # up_pool3 = tf.image.resize_bilinear(pool3_ms_saliency, [self.crop_size, self.crop_size]) # rnn_output_pool4 = self.rnn_cell(pool4_ms_saliency, 'pool4') # pool4_ms_saliency = tf.add(rnn_output_pool4, pool4_ms_saliency) # final_saliency_r1 = tf.add(up_pool4, up_fc8) ############### R2 ############### if (self.prior_type == 'prior'): conv1_1_r2 = tf.nn.relu( self.conv2d(self.X_prior, [3, 3, 4, 64], 'conv1_1_r2')) else: conv1_1_r2 = tf.nn.relu( self.conv2d(self.X_prior, [3, 3, 3, 64], 'conv1_1_r2')) conv1_2_r2 = tf.nn.relu( self.conv2d(conv1_1_r2, [3, 3, 64, 64], 'conv1_2_r2')) pool1_r2 = tf.nn.max_pool(conv1_2_r2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool1_r2') conv2_1_r2 = tf.nn.relu( self.conv2d(pool1_r2, [3, 3, 64, 128], 'conv2_1_r2')) conv2_2_r2 = tf.nn.relu( self.conv2d(conv2_1_r2, [3, 3, 128, 128], 'conv2_2_r2')) pool2_r2 = tf.nn.max_pool(conv2_2_r2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool2_r2') conv3_1_r2 = tf.nn.relu( self.conv2d(pool2_r2, [3, 3, 128, 256], 'conv3_1_r2')) conv3_2_r2 = tf.nn.relu( self.conv2d(conv3_1_r2, [3, 3, 256, 256], 'conv3_2_r2')) conv3_3_r2 = tf.nn.relu( self.conv2d(conv3_2_r2, [3, 3, 256, 256], 'conv3_3_r2')) pool3_r2 = tf.nn.max_pool(conv3_3_r2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool3_r2') conv4_1_r2 = tf.nn.relu( self.conv2d(pool3_r2, [3, 3, 256, 512], 'conv4_1_r2')) conv4_2_r2 = tf.nn.relu( self.conv2d(conv4_1_r2, [3, 3, 512, 512], 'conv4_2_r2')) conv4_3_r2 = tf.nn.relu( self.conv2d(conv4_2_r2, [3, 3, 512, 512], 'conv4_3_r2')) pool4_r2 = tf.nn.max_pool(conv4_3_r2, ksize=[1, 3, 3, 1], strides=[1, 1, 1, 1], padding='SAME', name='pool4_r2') conv5_1_r2 = tf.nn.relu( self.astro_conv2d(pool4_r2, [3, 3, 512, 512], hole=2, name='conv5_1_r2')) conv5_2_r2 = tf.nn.relu( self.astro_conv2d(conv5_1_r2, [3, 3, 512, 512], hole=2, name='conv5_2_r2')) conv5_3_r2 = tf.nn.relu( self.astro_conv2d(conv5_2_r2, [3, 3, 512, 512], hole=2, name='conv5_3_r2')) pool5_r2 = tf.nn.max_pool(conv5_3_r2, ksize=[1, 3, 3, 1], strides=[1, 1, 1, 1], padding='SAME') fc6_r2 = tf.nn.relu( self.astro_conv2d(pool5_r2, [4, 4, 512, 4096], hole=4, name='fc6_r2')) fc6_dropout_r2 = tf.nn.dropout(fc6_r2, 0.5) fc7_r2 = tf.nn.relu( self.astro_conv2d(fc6_dropout_r2, [1, 1, 4096, 4096], hole=4, name='fc7_r2')) fc7_dropout_r2 = tf.nn.dropout(fc7_r2, 0.5) fc8_r2 = self.conv2d(fc7_dropout_r2, [1, 1, 4096, 1], 'fc8_r2') up_fc8_r2 = tf.image.resize_bilinear(fc8_r2, [self.crop_size, self.crop_size]) pool4_conv_r2 = tf.nn.dropout( tf.nn.relu(self.conv2d(pool4_r2, [3, 3, 512, 128], 'pool4_conv_r2')), 0.5) pool4_fc_r2 = tf.nn.dropout( tf.nn.relu( self.conv2d(pool4_conv_r2, [1, 1, 128, 128], 'pool4_fc_r2')), 0.5) pool4_ms_saliency_r2 = self.conv2d(pool4_fc_r2, [1, 1, 128, 1], 'pool4_ms_saliency_r2') up_pool4_r2 = tf.image.resize_bilinear( pool4_ms_saliency_r2, [self.crop_size, self.crop_size]) # final_saliency_r2 = tf.add(up_pool4_r2, up_fc8_r2) pool3_conv_r2 = tf.nn.dropout( tf.nn.relu(self.conv2d(pool3_r2, [3, 3, 256, 128], 'pool3_conv_r2')), 0.5) pool3_fc_r2 = tf.nn.dropout( tf.nn.relu( self.conv2d(pool3_conv_r2, [1, 1, 128, 128], 'pool3_fc_r2')), 0.5) pool3_ms_saliency_r2 = self.conv2d(pool3_fc_r2, [1, 1, 128, 1], 'pool3_ms_saliency_r2') # up_pool3_r2 = tf.image.resize_bilinear(pool3_ms_saliency_r2, [self.crop_size, self.crop_size]) ########## rnn fusion ############ inputs = tf.expand_dims( tf.concat([up_pool4, up_pool4_r2, up_fc8, up_fc8_r2], axis=3), 0) cell = ConvLSTMCell([self.crop_size, self.crop_size], 1, [3, 3]) outputs, state = tf.nn.dynamic_rnn(cell, inputs, dtype=inputs.dtype, scope='rnn') rnn_output = tf.squeeze(outputs, axis=0) ########## C3D fusion ############ # inputs = tf.expand_dims(tf.concat([up_pool4, up_pool4_r2, up_fc8, up_fc8_r2], axis=3), 0) C3D_outputs = self.conv3d(inputs, [3, 3, 3, 4, 1], name='3D_conv') C3D_output = tf.squeeze(C3D_outputs, axis=0) ########### ST fusion ############ pool4_saliency_cancat = tf.concat( [pool4_ms_saliency, pool4_ms_saliency_r2], 3, name='concat_pool4') pool4_saliency_ST = self.conv2d(pool4_saliency_cancat, [1, 1, 2, 1], 'pool4_saliency_ST') up_pool4_ST = tf.image.resize_bilinear( pool4_saliency_ST, [self.crop_size, self.crop_size]) pool3_saliency_cancat = tf.concat( [pool3_ms_saliency, pool3_ms_saliency_r2], 3, name='concat_pool3') pool3_saliency_ST = self.conv2d(pool3_saliency_cancat, [1, 1, 2, 1], 'pool3_saliency_ST') up_pool3_ST = tf.image.resize_bilinear( pool3_saliency_ST, [self.crop_size, self.crop_size]) fc8_concat = tf.concat([fc8, fc8_r2], 3, name='concat_fc8') fc8_saliency_ST = self.conv2d(fc8_concat, [1, 1, 2, 1], 'fc8_saliency_ST') up_fc8_ST = tf.image.resize_bilinear(fc8_saliency_ST, [self.crop_size, self.crop_size]) # pool4_fc8_concat = tf.concat([up_pool3_ST, up_pool4_ST, up_fc8_ST], axis=3) # pool4_fc8_combine = self.conv2d(pool4_fc8_concat, [1, 1, 3, 1], 'pool4_fc8') pool4_fc8_combine = tf.add(up_pool3_ST, up_pool4_ST) pool4_fc8_combine = tf.add(pool4_fc8_combine, up_fc8_ST) # final_saliency = tf.add(up_pool4_ST, up_fc8_ST) # final_saliency = tf.add(final_saliency, up_pool4_r2) # final_saliency = tf.add(final_saliency, up_fc8_r2) # drop path process fusion = tf.concat([pool4_fc8_combine, rnn_output, C3D_output], axis=3) fusion_drop_path = tf.multiply(fusion, self.fusion_weight) final_saliency = tf.reduce_sum(fusion_drop_path, axis=3, keep_dims=True) ave_num = tf.constant( 3.0, dtype=tf.float32, shape=[self.batch_size, self.crop_size, self.crop_size, 1]) final_saliency = tf.div(final_saliency, ave_num) self.final_saliency = tf.sigmoid(final_saliency) self.up_fc8 = up_fc8 self.rnn_output = final_saliency self.saver = tf.train.Saver() self.loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits( logits=final_saliency, labels=self.Y), name='loss') # self.loss = tf.reduce_mean(tf.nn.weighted_cross_entropy_with_logits(logits=final_saliency, targets=self.Y, pos_weight=0.12), name='loss') # self.loss_rnn = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=C3D_output, labels=self.Y), # name='loss2') # tf.summary.scalar('entropy', self.loss + 0.5 * self.loss_rnn) tf.summary.scalar('entropy', self.loss) optimizer = tf.train.AdamOptimizer(self.lr, name='optimizer') trainable_var = tf.trainable_variables() # grads = optimizer.compute_gradients(self.loss + 0.5 * self.loss_rnn, var_list=trainable_var) # grads = optimizer.compute_gradients(self.loss, var_list=trainable_var) # optimizer2 = tf.train.MomentumOptimizer(self.lr, 0.99) grads = optimizer.compute_gradients(self.loss, var_list=trainable_var) # grads = optimizer2.compute_gradients(self.loss, var_list=trainable_var[-46:]) self.train_op = optimizer.apply_gradients(grads)
import tensorflow as tf batch_size = 32 timesteps = 100 shape = [640, 480] kernel = [3, 3] channels = 3 filters = 12 # Create a placeholder for videos. inputs = tf.placeholder(tf.float32, [batch_size, timesteps] + shape + [channels]) # Add the ConvLSTM step. from cell import ConvLSTMCell cell = ConvLSTMCell(shape, filters, kernel) outputs, state = tf.nn.dynamic_rnn(cell, inputs, dtype=inputs.dtype) # There's also a ConvGRUCell that is more memory efficient. from cell import ConvGRUCell cell = ConvGRUCell(shape, filters, kernel) outputs, state = tf.nn.dynamic_rnn(cell, inputs, dtype=inputs.dtype) # It's also possible to enter 2D input or 4D input instead of 3D. shape = [100] kernel = [3] inputs = tf.placeholder(tf.float32, [batch_size, timesteps] + shape + [channels]) cell = ConvLSTMCell(shape, filters, kernel) outputs, state = tf.nn.bidirectional_dynamic_rnn(cell, cell,
def detail_fusion_net(hr_sparses, referenced): """ # Arguments hr_sparses: (b, h, w, c) * t """ upsampled_referenced = tf.image.resize_images(images=referenced, size=(H, W), method=2) # ==================== # Encoder Part # ==================== all_skip_connections = [] encoded = [] with tf.variable_scope("Encoder") as scope: for frame_idx, l in enumerate(hr_sparses): skip_connections = [] for layer_idx in range(len(cfg.detail_fusion_net.encoder.k_size)): l = Conv2D( 'Conv.{}'.format(layer_idx), l, out_channel=cfg.detail_fusion_net.encoder. ch_out[layer_idx], kernel_shape=tuple( [cfg.detail_fusion_net.encoder.k_size[layer_idx]] * 2), stride=cfg.detail_fusion_net.encoder.stride[layer_idx], padding='same', nl=tf.nn.relu, W_init=tf.contrib.layers.xavier_initializer()) if layer_idx in [0, 2]: skip_connections.append(tf.identity(l)) # skip_connections.append() encoded.append( tf.reshape(l, (-1, 1, H // 4, W // 4, cfg.detail_fusion_net.encoder.ch_out[-1]))) all_skip_connections.append(skip_connections) scope.reuse_variables() # ==================== # ConvLSTM # ==================== temporal = tf.concat(encoded, axis=1) print('temporal', temporal) shape = [H // 4, W // 4] filters = 128 kernel = [3, 3] with tf.variable_scope('ConvLSTM') as scope: cell = ConvLSTMCell(shape, filters, kernel, activation=tf.nn.relu) temporal_outputs, state = tf.nn.dynamic_rnn(cell, temporal, dtype=l.dtype) list_temporal_outputs = tf.split(temporal_outputs, cfg.frames, axis=1) # ==================== # Decoder Part # ==================== decoded = [] with tf.variable_scope("Decoder") as scope: for l in list_temporal_outputs: skip_connections = all_skip_connections.pop(0) l = tf.reshape(l, (-1, H // 4, W // 4, filters)) for layer_idx in range(len(cfg.detail_fusion_net.decoder.k_size)): if cfg.detail_fusion_net.decoder.type[layer_idx] == 'conv': l = Conv2D( 'Conv.{}'.format(layer_idx), l, out_channel=cfg.detail_fusion_net.decoder. ch_out[layer_idx], kernel_shape=tuple( [cfg.detail_fusion_net.decoder.k_size[layer_idx]] * 2), stride=cfg.detail_fusion_net.decoder.stride[layer_idx], padding='same', nl=tf.nn.relu, W_init=tf.contrib.layers.xavier_initializer()) else: l = Deconv2D( 'Deconv.{}'.format(layer_idx), l, out_channel=cfg.detail_fusion_net.decoder. ch_out[layer_idx], kernel_shape=tuple( [cfg.detail_fusion_net.decoder.k_size[layer_idx]] * 2), stride=cfg.detail_fusion_net.decoder.stride[layer_idx], padding='same', nl=tf.nn.relu, W_init=tf.contrib.layers.xavier_initializer()) # skip connections l = l + skip_connections.pop() l += upsampled_referenced decoded.append(l) scope.reuse_variables() return decoded
def build_ST_RNN(self): ############### Input ############### self.X = tf.placeholder( tf.float32, [self.batch_size, self.crop_size, self.crop_size, 3], name='rgb_image') self.Y = tf.placeholder( tf.float32, [self.batch_size, self.crop_size, self.crop_size, 1], name='gt') if (self.prior_type == 'prior'): self.X_prior = tf.placeholder( tf.float32, [self.batch_size, self.crop_size, self.crop_size, 4], name='rgb_prior_image') else: self.X_prior = tf.placeholder( tf.float32, [self.batch_size, self.crop_size, self.crop_size, 3], name='rgb_flow_image') ############### R1 ############### conv1_1 = tf.nn.relu(self.conv2d(self.X, [3, 3, 3, 64], 'conv1_1')) conv1_2 = tf.nn.relu(self.conv2d(conv1_1, [3, 3, 64, 64], 'conv1_2')) pool1 = tf.nn.max_pool(conv1_2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool1') conv2_1 = tf.nn.relu(self.conv2d(pool1, [3, 3, 64, 128], 'conv2_1')) conv2_2 = tf.nn.relu(self.conv2d(conv2_1, [3, 3, 128, 128], 'conv2_2')) pool2 = tf.nn.max_pool(conv2_2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool2') conv3_1 = tf.nn.relu(self.conv2d(pool2, [3, 3, 128, 256], 'conv3_1')) conv3_2 = tf.nn.relu(self.conv2d(conv3_1, [3, 3, 256, 256], 'conv3_2')) conv3_3 = tf.nn.relu(self.conv2d(conv3_2, [3, 3, 256, 256], 'conv3_3')) pool3 = tf.nn.max_pool(conv3_3, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool3') conv4_1 = tf.nn.relu(self.conv2d(pool3, [3, 3, 256, 512], 'conv4_1')) conv4_2 = tf.nn.relu(self.conv2d(conv4_1, [3, 3, 512, 512], 'conv4_2')) conv4_3 = tf.nn.relu(self.conv2d(conv4_2, [3, 3, 512, 512], 'conv4_3')) pool4 = tf.nn.max_pool(conv4_3, ksize=[1, 3, 3, 1], strides=[1, 1, 1, 1], padding='SAME', name='pool4') conv5_1 = tf.nn.relu( self.astro_conv2d(pool4, [3, 3, 512, 512], hole=2, name='conv5_1')) conv5_2 = tf.nn.relu( self.astro_conv2d(conv5_1, [3, 3, 512, 512], hole=2, name='conv5_2')) conv5_3 = tf.nn.relu( self.astro_conv2d(conv5_2, [3, 3, 512, 512], hole=2, name='conv5_3')) pool5 = tf.nn.max_pool(conv5_3, ksize=[1, 3, 3, 1], strides=[1, 1, 1, 1], padding='SAME') fc6 = tf.nn.relu( self.astro_conv2d(pool5, [4, 4, 512, 4096], hole=4, name='fc6')) fc6_dropout = tf.nn.dropout(fc6, 0.5) fc7 = tf.nn.relu( self.astro_conv2d(fc6_dropout, [1, 1, 4096, 4096], hole=4, name='fc7')) fc7_dropout = tf.nn.dropout(fc7, 0.5) fc8 = self.conv2d(fc7_dropout, [1, 1, 4096, 1], 'fc8') # rnn_output_fc8 = self.rnn_cell(fc8, 'fc8') # fc8 = tf.add(rnn_output_fc8, fc8) up_fc8 = tf.image.resize_bilinear(fc8, [self.crop_size, self.crop_size]) pool4_conv = tf.nn.dropout( tf.nn.relu(self.conv2d(pool4, [3, 3, 512, 128], 'pool4_conv')), 0.5) pool4_fc = tf.nn.dropout( tf.nn.relu(self.conv2d(pool4_conv, [1, 1, 128, 128], 'pool4_fc')), 0.5) pool4_ms_saliency = self.conv2d(pool4_fc, [1, 1, 128, 1], 'pool4_ms_saliency') # rnn_output_pool4 = self.rnn_cell(pool4_ms_saliency, 'pool4') # pool4_ms_saliency = tf.add(rnn_output_pool4, pool4_ms_saliency) up_pool4 = tf.image.resize_bilinear(pool4_ms_saliency, [self.crop_size, self.crop_size]) # final_saliency_r1 = tf.add(up_pool4, up_fc8) ############### R2 ############### if (self.prior_type == 'prior'): conv1_1_r2 = tf.nn.relu( self.conv2d(self.X_prior, [3, 3, 4, 64], 'conv1_1_r2')) else: conv1_1_r2 = tf.nn.relu( self.conv2d(self.X_prior, [3, 3, 3, 64], 'conv1_1_r2')) conv1_2_r2 = tf.nn.relu( self.conv2d(conv1_1_r2, [3, 3, 64, 64], 'conv1_2_r2')) pool1_r2 = tf.nn.max_pool(conv1_2_r2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool1_r2') conv2_1_r2 = tf.nn.relu( self.conv2d(pool1_r2, [3, 3, 64, 128], 'conv2_1_r2')) conv2_2_r2 = tf.nn.relu( self.conv2d(conv2_1_r2, [3, 3, 128, 128], 'conv2_2_r2')) pool2_r2 = tf.nn.max_pool(conv2_2_r2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool2_r2') conv3_1_r2 = tf.nn.relu( self.conv2d(pool2_r2, [3, 3, 128, 256], 'conv3_1_r2')) conv3_2_r2 = tf.nn.relu( self.conv2d(conv3_1_r2, [3, 3, 256, 256], 'conv3_2_r2')) conv3_3_r2 = tf.nn.relu( self.conv2d(conv3_2_r2, [3, 3, 256, 256], 'conv3_3_r2')) pool3_r2 = tf.nn.max_pool(conv3_3_r2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool3_r2') conv4_1_r2 = tf.nn.relu( self.conv2d(pool3_r2, [3, 3, 256, 512], 'conv4_1_r2')) conv4_2_r2 = tf.nn.relu( self.conv2d(conv4_1_r2, [3, 3, 512, 512], 'conv4_2_r2')) conv4_3_r2 = tf.nn.relu( self.conv2d(conv4_2_r2, [3, 3, 512, 512], 'conv4_3_r2')) pool4_r2 = tf.nn.max_pool(conv4_3_r2, ksize=[1, 3, 3, 1], strides=[1, 1, 1, 1], padding='SAME', name='pool4_r2') conv5_1_r2 = tf.nn.relu( self.astro_conv2d(pool4_r2, [3, 3, 512, 512], hole=2, name='conv5_1_r2')) conv5_2_r2 = tf.nn.relu( self.astro_conv2d(conv5_1_r2, [3, 3, 512, 512], hole=2, name='conv5_2_r2')) conv5_3_r2 = tf.nn.relu( self.astro_conv2d(conv5_2_r2, [3, 3, 512, 512], hole=2, name='conv5_3_r2')) pool5_r2 = tf.nn.max_pool(conv5_3_r2, ksize=[1, 3, 3, 1], strides=[1, 1, 1, 1], padding='SAME') fc6_r2 = tf.nn.relu( self.astro_conv2d(pool5_r2, [4, 4, 512, 4096], hole=4, name='fc6_r2')) fc6_dropout_r2 = tf.nn.dropout(fc6_r2, 0.5) fc7_r2 = tf.nn.relu( self.astro_conv2d(fc6_dropout_r2, [1, 1, 4096, 4096], hole=4, name='fc7_r2')) fc7_dropout_r2 = tf.nn.dropout(fc7_r2, 0.5) fc8_r2 = self.conv2d(fc7_dropout_r2, [1, 1, 4096, 1], 'fc8_r2') up_fc8_r2 = tf.image.resize_bilinear(fc8_r2, [self.crop_size, self.crop_size]) pool4_conv_r2 = tf.nn.dropout( tf.nn.relu(self.conv2d(pool4_r2, [3, 3, 512, 128], 'pool4_conv_r2')), 0.5) pool4_fc_r2 = tf.nn.dropout( tf.nn.relu( self.conv2d(pool4_conv_r2, [1, 1, 128, 128], 'pool4_fc_r2')), 0.5) pool4_ms_saliency_r2 = self.conv2d(pool4_fc_r2, [1, 1, 128, 1], 'pool4_ms_saliency_r2') up_pool4_r2 = tf.image.resize_bilinear( pool4_ms_saliency_r2, [self.crop_size, self.crop_size]) final_saliency_r2 = tf.add(up_pool4_r2, up_fc8_r2) ########## rnn fusion ############ inputs = tf.expand_dims( tf.concat([up_pool4, up_pool4_r2, up_fc8, up_fc8_r2], axis=3), 0) cell = ConvLSTMCell([self.crop_size, self.crop_size], 1, [3, 3]) outputs, state = tf.nn.dynamic_rnn(cell, inputs, dtype=inputs.dtype, scope='rnn') rnn_output = tf.squeeze(outputs, axis=0) ########## C3D fusion ############ # inputs = tf.expand_dims(tf.concat([up_pool4, up_pool4_r2, up_fc8, up_fc8_r2], axis=3), 0) # C3D_outputs = self.conv3d(inputs, [3, 3, 3, 4, 1], name='3D_conv') # C3D_output = tf.squeeze(C3D_outputs, axis=0) ########### ST fusion ############ pool4_saliency_cancat = tf.concat( [pool4_ms_saliency, pool4_ms_saliency_r2], 3, name='concat_pool4') pool4_saliency_ST = self.conv2d(pool4_saliency_cancat, [1, 1, 2, 1], 'pool4_saliency_ST') up_pool4_ST = tf.image.resize_bilinear( pool4_saliency_ST, [self.crop_size, self.crop_size]) fc8_concat = tf.concat([fc8, fc8_r2], 3, name='concat_fc8') fc8_saliency_ST = self.conv2d(fc8_concat, [1, 1, 2, 1], 'fc8_saliency_ST') up_fc8_ST = tf.image.resize_bilinear(fc8_saliency_ST, [self.crop_size, self.crop_size]) # pool4_fc8_concat = tf.concat([up_pool4_ST, up_fc8_ST], axis=3) # pool4_fc8_combine = self.conv2d(pool4_fc8_concat, [1, 1, 2, 1], 'pool4_fc8') # final_saliency = tf.add(up_pool4_ST, up_fc8_ST) # final_saliency = tf.add(final_saliency, up_pool4_r2) # final_saliency = tf.add(pool4_fc8_combine, rnn_output) # final_saliency = tf.add(final_saliency, C3D_output) # ave_num = tf.constant(3.0, dtype=tf.float32, shape=[self.batch_size, self.crop_size, self.crop_size, 1]) # final_saliency = tf.div(final_saliency, ave_num) self.saver = tf.train.Saver() self.init = tf.global_variables_initializer() # self.load() self.sess.run(self.init) self.saver.restore(self.sess, self.ckpt_dir) conv3D_w = tf.Variable(tf.truncated_normal(shape=[3, 3, 3, 4, 1]), dtype=tf.float32, name='3D_conv_w') conv3D_b = tf.Variable(tf.truncated_normal(shape=[1, 1, 1, 1, 1]), dtype=tf.float32, name='3D_conv_b') C3D_outputs = tf.nn.conv3d(inputs, conv3D_w, strides=[1, 1, 1, 1, 1], padding='SAME', name='C3D') + conv3D_b C3D_output = tf.squeeze(C3D_outputs, axis=0) # pool4_fc8_combine_w = tf.Variable(tf.truncated_normal(shape=[1, 1, 2, 1]), dtype=tf.float32, name='pool4_fc8_w') # pool4_fc8_combine_b = tf.Variable(tf.truncated_normal(shape=[1, 1, 1, 1]), dtype=tf.float32, name='pool4_fc8_b') pool4_fc8_combine_w = tf.get_variable( 'pool4_fc8_w', shape=[1, 1, 2, 1], dtype=tf.float32, initializer=tf.random_normal_initializer()) pool4_fc8_combine_b = tf.get_variable( 'pool4_fc8_b', shape=[1, 1, 1, 1], dtype=tf.float32, initializer=tf.random_normal_initializer()) pool4_fc8_concat = tf.concat([up_pool4_ST, up_fc8_ST], axis=3) pool4_fc8_combine = tf.nn.conv2d(pool4_fc8_concat, pool4_fc8_combine_w, strides=[1, 1, 1, 1], padding='SAME') + pool4_fc8_combine_b # saver = tf.train.Saver() # init = tf.global_variables_initializer() self.sess.run(conv3D_w.initializer) self.sess.run(conv3D_b.initializer) self.sess.run(pool4_fc8_combine_w.initializer) self.sess.run(pool4_fc8_combine_b.initializer) final_saliency = tf.add(pool4_fc8_combine, C3D_output) final_saliency = tf.add(final_saliency, rnn_output) self.final_saliency = tf.sigmoid(final_saliency) self.up_fc8 = up_fc8 self.rnn_output = final_saliency self.loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits( logits=final_saliency, labels=self.Y), name='loss') # self.loss_rnn = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=rnn_output, labels=self.Y), # name='loss2') # tf.summary.scalar('entropy', self.loss + 0.1 * self.loss_rnn) tf.summary.scalar('entropy', self.loss) trainable_var = tf.trainable_variables() # self.saver.save(self.sess, 'fusion_tmp_parameter/fusionST_C3D_tensorflow.ckpt') # optimizer = tf.train.AdamOptimizer(self.lr, name='optimizer') # grads = optimizer.compute_gradients(self.loss + 0.5 * self.loss_rnn, var_list=trainable_var[-22:]) optimizer2 = tf.train.MomentumOptimizer(self.lr, 0.99) # grads = optimizer2.compute_gradients(self.loss + 0.5 * self.loss_rnn, var_list=trainable_var[-22:]) grads = optimizer2.compute_gradients(self.loss, var_list=trainable_var[-22:]) self.train_op = optimizer2.apply_gradients(grads)
########## rnn fusion ############ # inputs = tf.expand_dims(tf.concat([up_pool4, up_pool4_r2], axis=3), 0) # cell = ConvLSTMCell([512, 512], 1, [3, 3]) # outputs, state = tf.nn.dynamic_rnn(cell, inputs, dtype=inputs.dtype, scope='rnn') # rnn_output = tf.squeeze(outputs, axis=0) # # inputs2 = tf.expand_dims(tf.concat([up_fc8, up_fc8_r2], axis=3), 0) # cell2 = ConvLSTMCell([512, 512], 1, [3, 3]) # outputs2, state2 = tf.nn.dynamic_rnn(cell2, inputs2, dtype=inputs.dtype, scope='rnn2') # rnn_output2 = tf.squeeze(outputs2, axis=0) inputs = tf.expand_dims( tf.concat([up_pool4, up_pool4_r2, up_fc8, up_fc8_r2], axis=3), 0) cell = ConvLSTMCell([128, 128], 1, [3, 3]) conv3D_w = tf.Variable(tf.truncated_normal(shape=[3, 3, 3, 4, 1]), dtype=tf.float32, name='3D_conv_w') conv3D_b = tf.Variable(tf.truncated_normal(shape=[1, 1, 1, 1, 1]), dtype=tf.float32, name='3D_conv_b') C3D_outputs = tf.nn.conv3d( inputs, conv3D_w, strides=[1, 1, 1, 1, 1 ], padding='SAME', name='C3D') + conv3D_b # outputs_static, outputs_dynamic = rnn_cell(inputs, 'rnn') outputs, state = tf.nn.dynamic_rnn(cell, inputs,
def create_model(self): # conv layer reshape input conv_inp_reshape_size = [ self.batch_size * self.timesteps, ] + self.shape + [ self.channels, ] conv_input = tf.reshape(self.inputs, conv_inp_reshape_size) # conv before lstm with tf.variable_scope('conv_before_lstm'): net = slim.conv2d( conv_input, 32, [3, 3], scope='conv_1', weights_initializer=trunc_normal(0.01), weights_regularizer=regularizers.l2_regularizer(l2_val)) net = slim.conv2d( net, 64, [3, 3], stride=2, scope='conv_2', weights_initializer=trunc_normal(0.01), weights_regularizer=regularizers.l2_regularizer(l2_val)) net = slim.conv2d( net, 128, [3, 3], stride=2, scope='conv_3', weights_initializer=trunc_normal(0.01), weights_regularizer=regularizers.l2_regularizer(l2_val)) net = slim.conv2d( net, 256, [3, 3], scope='conv_4', weights_initializer=trunc_normal(0.01), weights_regularizer=regularizers.l2_regularizer(l2_val)) # back to lstm shape ! net_output_shape = net.get_shape().as_list() lstm_reshape_size = [self.batch_size, self.timesteps ] + net_output_shape[1:] lstm_reshape = tf.reshape(net, lstm_reshape_size) batch_size, time_step, H, W, C = lstm_reshape.get_shape().as_list() with tf.variable_scope('lstm_model'): cells = [] for i, each_filter in enumerate(self.filters): cell = ConvLSTMCell([H, W], each_filter, self.kernel) cells.append(cell) cell = tf.nn.rnn_cell.MultiRNNCell(cells, state_is_tuple=True) states_series, current_state = tf.nn.dynamic_rnn( cell, lstm_reshape, dtype=lstm_reshape.dtype) # current_state => Not used ... model_output = states_series # reshape for conv transpose batch_size, time_step, H, W, C = model_output.get_shape().as_list() deconv_reshape = tf.reshape(model_output, [batch_size * time_step, H, W, C]) with tf.variable_scope('deconv_after_lstm'): net = slim.conv2d_transpose( deconv_reshape, 256, [3, 3], scope='deconv_4', weights_initializer=trunc_normal(0.01), weights_regularizer=regularizers.l2_regularizer(l2_val)) net = slim.conv2d_transpose( net, 128, [3, 3], scope='deconv_3', weights_initializer=trunc_normal(0.01), weights_regularizer=regularizers.l2_regularizer(l2_val)) net = slim.conv2d_transpose( net, 64, [3, 3], stride=2, scope='deconv_2', weights_initializer=trunc_normal(0.01), weights_regularizer=regularizers.l2_regularizer(l2_val)) net = slim.conv2d_transpose( net, 32, [3, 3], stride=2, scope='deconv_1', weights_initializer=trunc_normal(0.01), weights_regularizer=regularizers.l2_regularizer(l2_val)) net = slim.conv2d_transpose( net, 3, [3, 3], activation_fn=tf.tanh, scope='deconv_0', weights_initializer=trunc_normal(0.01), weights_regularizer=regularizers.l2_regularizer(l2_val)) net_pred_shape = net.get_shape().as_list() out_pred_shape = [ batch_size, time_step, ] + net_pred_shape[1:] output_pred = tf.reshape(net, out_pred_shape) self.model_output = output_pred
def feedforward(self, ml, mh, regularizer=0): input_shape = mh.get_shape().as_list() batch_size = input_shape[0] #LSTM with tf.variable_scope('LSTM_1'): rh = tf.get_variable( "rh", [batch_size, self.output_h, self.output_w, self.output_depth], initializer=tf.constant_initializer(0.0), trainable=False) conv_w1 = tf.get_variable( "weight_lstm1", filter_shape, initializer=tf.truncated_normal_initializer(stddev=0.001)) conv_b1 = tf.get_variable("bias1", 1, initializer=tf.constant_initializer(0.0)) cell_g1 = ConvLSTMCell([self.output_h, self.output_w], filters_lstm, kernal_lstm) init_state_g1 = cell_g1.zero_state(batch_size, dtype=tf.float32) state1 = init_state_g1 for timestep in range(iteration): if timestep > 0: tf.get_variable_scope().reuse_variables() rh_tr = tf.transpose(rh, perm=[0, 2, 1, 3]) g1 = odl_op_layer_adjoint((odl_op_layer(rh_tr) - mh)) gt1 = tf.transpose(g1, perm=[0, 2, 1, 3]) (cell_output1, state1) = cell_g1(gt1, state1) conv1 = tf.nn.conv2d(cell_output1, conv_w1, [1, 1, 1, 1], "VALID") s1 = tf.nn.tanh(tf.nn.bias_add(conv1, conv_b1)) self.variable_summaries(s1, ('s1_%d' % timestep)) rh = rh + 0.0001 * s1 * gt1 rh = tf.clip_by_value(rh, 0, 5) tf.summary.image('rh_pred_%d' % timestep, rh, 1) with tf.variable_scope('LSTM_2'): rl = tf.get_variable( "rl", [batch_size, self.output_h, self.output_w, self.output_depth], initializer=tf.constant_initializer(0.0), trainable=False) conv_w2 = tf.get_variable( "weight_lstm2", filter_shape, initializer=tf.truncated_normal_initializer(stddev=0.001)) conv_b2 = tf.get_variable("bias2", 1, initializer=tf.constant_initializer(0.0)) cell_g2 = ConvLSTMCell([self.output_h, self.output_w], filters_lstm, kernal_lstm) init_state_g2 = cell_g2.zero_state(batch_size, dtype=tf.float32) state2 = init_state_g2 for timestep in range(iteration): if timestep > 0: tf.get_variable_scope().reuse_variables() rl_tr = tf.transpose(rl, perm=[0, 2, 1, 3]) g2 = odl_op_layer_adjoint((odl_op_layer(rl_tr) - ml)) gt2 = tf.transpose(g2, perm=[0, 2, 1, 3]) (cell_output2, state2) = cell_g2(gt2, state2) conv2 = tf.nn.conv2d(cell_output2, conv_w2, [1, 1, 1, 1], "VALID") s2 = tf.nn.tanh(tf.nn.bias_add(conv2, conv_b2)) self.variable_summaries(s2, ('s2_%d' % timestep)) rl = rl + 0.0001 * s2 * gt2 rl = tf.clip_by_value(rl, 0, 5) tf.summary.image('rl_pred_%d' % timestep, rl, 1) #CNN layer_L1 = layer_xyf.convo(rl, "conv_L1", FILTER_1, STRIDE_1, PAD_1) layer_L2 = layer_xyf.convo(layer_L1, "conv_L2", FILTER_2, STRIDE_2, PAD_2) layer_L3 = layer_xyf.convo(layer_L2, "conv_L3", FILTER_3, STRIDE_3, PAD_3) layer_L4 = layer_xyf.convo(layer_L3, "conv_L4", FILTER_4, STRIDE_4, PAD_4) layer_L5 = layer_xyf.convo(layer_L4, "conv_L5", FILTER_5, STRIDE_5, PAD_5) layer_H1 = layer_xyf.convo(rh, "conv_H1", FILTER_1, STRIDE_1, PAD_1) layer_H2 = layer_xyf.convo(layer_H1, "conv_H2", FILTER_2, STRIDE_2, PAD_2) layer_H3 = layer_xyf.convo(layer_H2, "conv_H3", FILTER_3, STRIDE_3, PAD_3) layer_H4 = layer_xyf.convo(layer_H3, "conv_H4", FILTER_4, STRIDE_4, PAD_4) layer_H5 = layer_xyf.convo(layer_H4, "conv_H5", FILTER_5, STRIDE_5, PAD_5) combine_LH = tf.concat([layer_L5, layer_H5], 3) pa_pred = layer_xyf.convo_noneRelu(combine_LH, "conv_pa", FILTER_6, STRIDE_6, PAD_6) pb_pred = layer_xyf.convo_noneRelu(combine_LH, "conv_pb", FILTER_6, STRIDE_6, PAD_6) pc_pred = layer_xyf.convo_noneRelu(combine_LH, "conv_pc", FILTER_6, STRIDE_6, PAD_6) pd_pred = layer_xyf.convo_noneRelu(combine_LH, "conv_pd", FILTER_6, STRIDE_6, PAD_6) d1 = pa_pred * rh + pb_pred * rl d2 = pc_pred * rh + pd_pred * rl d1 = tf.clip_by_value(d1, 0, 5) d2 = tf.clip_by_value(d2, 0, 5) tf.summary.image('d1_pred', d1, 1) tf.summary.image('d2_pred', d2, 1) return d1, d2, rl, rh