def _shared_cnn(self, inputs, is_train, reuse=False): """ """ shared_cnn_params = \ [ConvParams(512, 3, (1, 1), 'same', False, True, 'conv1'), ConvParams(512, 3, (1, 1), 'same', False, True, 'conv2'), ConvParams(512, 3, (1, 1), 'same', False, True, 'conv3'), ConvParams(512, 3, (1, 1), 'same', False, True, 'conv4'), ConvParams(512, 3, (1, 1), 'same', False, True, 'conv5')] with tf.variable_scope("shared_cnn", reuse=reuse): conv1 = conv_layer(inputs, shared_cnn_params[0], is_train) conv1 = tf.pad(conv1, [[0, 0], [1, 1], [0, 0], [0, 0]]) pool1 = pool_layer(conv1, 2, 'valid', 'pool1', wstride=1) conv2 = conv_layer(pool1, shared_cnn_params[1], is_train) conv2 = tf.pad(conv2, [[0, 0], [1, 1], [1, 1], [0, 0]]) pool2 = pool_layer(conv2, 2, 'valid', 'pool2', wstride=1) conv3 = conv_layer(pool2, shared_cnn_params[2], is_train) pool3 = pool_layer(conv3, 2, 'valid', 'pool3', wstride=1) conv4 = conv_layer(pool3, shared_cnn_params[3], is_train) pool4 = pool_layer(conv4, 2, 'valid', 'pool4', wstride=1) conv5 = conv_layer(pool4, shared_cnn_params[4], is_train) pool5 = pool_layer(conv5, 2, 'valid', 'pool5', wstride=1) features = tf.reshape(pool5, (-1, 23, 512)) return features
def _clue_network(self, inputs, is_train): """ """ clue_network_params = \ [ConvParams(512, 3, (1, 1), 'same', False, True, 'conv1'), ConvParams(512, 3, (1, 1), 'same', False, True, 'conv2')] weight_initializer = tf.truncated_normal_initializer(stddev=0.01) bias_initializer = tf.constant_initializer(value=0.0) assert inputs.get_shape()[1:] == (26, 26, 256) with tf.variable_scope("clue_network"): conv1 = conv_layer(inputs, clue_network_params[0], is_train) conv1 = tf.pad(conv1, [[0, 0], [1, 1], [1, 1], [0, 0]]) pool1 = pool_layer(conv1, 2, 'valid', 'pool1') conv2 = conv_layer(pool1, clue_network_params[1], is_train) conv2 = tf.pad(conv2, [[0, 0], [1, 1], [1, 1], [0, 0]]) pool2 = pool_layer(conv2, 2, 'valid', 'pool2') features = tf.reshape(pool2, (-1, 64, 512)) features = tf.transpose(features, perm=[0, 2, 1]) features = tf.layers.dense(features, 23, kernel_initializer=weight_initializer, bias_initializer=bias_initializer, activation=tf.nn.relu, name='length_dense') features = tf.contrib.layers.dropout(features, keep_prob=0.8, is_training=is_train) features = tf.transpose(features, perm=[0, 2, 1]) features = tf.layers.dense(features, 4, kernel_initializer=weight_initializer, bias_initializer=bias_initializer, activation=tf.nn.softmax, name='channel_dense') features = tf.transpose(features, perm=[0, 2, 1]) features = tf.expand_dims(features, axis=-1) return features
def _convnet_layers(self, inputs, widths, is_train): """ Build convolutional network layers attached to the given input tensor """ conv_params = \ [ConvParams(64, 3, (1, 1), 'same', True, False, 'conv1'), ConvParams(512, 2, (1, 1), 'valid', False, True, 'conv2')] recur_params = [{'channel': 64}, {'channel': 128}, {'channel': 256}] with tf.variable_scope("convnet"): conv1 = conv_layer(inputs, conv_params[0], is_train) pool1 = pool_layer(conv1, 2, 'valid', 'pool1') grcl1 = self._gated_recurrent_conv_layer(pool1, recur_params[0], is_train, iteration=3, name='grcl1') pool2 = pool_layer(grcl1, 2, 'valid', 'pool2') grcl2 = self._gated_recurrent_conv_layer(pool2, recur_params[1], is_train, iteration=3, name='grcl2') grcl2 = tf.pad(grcl2, [[0, 0], [0, 0], [1, 1], [0, 0]]) pool3 = pool_layer(grcl2, 2, 'valid', 'pool3', wstride=1) grcl3 = self._gated_recurrent_conv_layer(pool3, recur_params[2], is_train, iteration=3, name='grcl3') grcl3 = tf.pad(grcl3, [[0, 0], [0, 0], [1, 1], [0, 0]]) pool4 = pool_layer(grcl3, 2, 'valid', 'pool4', wstride=1) conv2 = conv_layer(pool4, conv_params[1], is_train) features = tf.squeeze(conv2, axis=1, name='features') sequence_length = widths // 4 + 1 sequence_length = tf.reshape(sequence_length, [-1], name='seq_len') return features, sequence_length
def _transformer_layers(self, inputs, widths, is_train): """ """ conv_params = \ [ConvParams(self.hidden_size // 2, 3, (1, 1), 'same', False, True, 'conv1'), ConvParams(self.hidden_size, 3, (1, 1), 'same', False, True, 'conv2')] with tf.variable_scope("transformer_layers"): conv1 = conv_layer(inputs, conv_params[0], is_train) conv1 = pool_layer(conv1, 2, 'valid', 'pool1') conv2 = conv_layer(conv1, conv_params[1], is_train) conv2 = pool_layer(conv2, 2, 'valid', 'pool2') features, shape, weights = \ self.transformer_encoder(conv2, self.enc_layers, self.hidden_size, is_train) features = tf.reshape(features, (shape[0], shape[1] * shape[2], shape[3])) return features, shape, weights
def _bcnn(self, inputs, is_train): """ """ bcnn_params = \ [ConvParams(64, 3, (1, 1), 'same', False, True, 'conv1'), ConvParams(128, 3, (1, 1), 'same', False, True, 'conv2'), ConvParams(256, 3, (1, 1), 'same', False, True, 'conv3'), ConvParams(256, 3, (1, 1), 'same', False, True, 'conv4')] assert inputs.get_shape()[1:] == (100, 100, 1) with tf.variable_scope("bcnn"): conv1 = conv_layer(inputs, bcnn_params[0], is_train) pool1 = pool_layer(conv1, 2, 'valid', 'pool1') conv2 = conv_layer(pool1, bcnn_params[1], is_train) conv2 = tf.pad(conv2, [[0, 0], [1, 1], [1, 1], [0, 0]]) pool2 = pool_layer(conv2, 2, 'valid', 'pool2') conv3 = conv_layer(pool2, bcnn_params[2], is_train) features = conv_layer(conv3, bcnn_params[3], is_train) return features
def _convnet_layers(self, inputs, widths, is_train): """ Build convolutional network layers attached to the given input tensor """ # Conv params : Filters K Stride Padding Bias BN Name conv_params = \ [ConvParams(64, 3, (1, 1), 'same', True, False, 'conv1'), ConvParams(128, 3, (1, 1), 'same', True, False, 'conv2'), ConvParams(256, 3, (1, 1), 'same', True, False, 'conv3'), ConvParams(256, 3, (1, 1), 'same', True, False, 'conv4'), ConvParams(512, 3, (1, 1), 'same', False, True, 'conv5'), ConvParams(512, 3, (1, 1), 'same', False, True, 'conv6'), ConvParams(512, 2, (1, 1), 'valid', True, False, 'conv7')] with tf.variable_scope("convnet"): conv1 = conv_layer(inputs, conv_params[0], is_train) pool1 = pool_layer(conv1, 2, 'valid', 'pool1') conv2 = conv_layer(pool1, conv_params[1], is_train) pool2 = pool_layer(conv2, 2, 'valid', 'pool2') conv3 = conv_layer(pool2, conv_params[2], is_train) conv4 = conv_layer(conv3, conv_params[3], is_train) pool3 = pool_layer(conv4, 1, 'valid', 'pool3', wstride=1) conv5 = conv_layer(pool3, conv_params[4], is_train) conv6 = conv_layer(conv5, conv_params[5], is_train) pool4 = pool_layer(conv6, 1, 'valid', 'pool4', wstride=1) conv7 = conv_layer(pool4, conv_params[6], is_train) features = tf.squeeze(conv7, axis=1, name='features') sequence_length = widths // 4 - 1 sequence_length = tf.reshape(sequence_length, [-1], name='seq_len') return features, sequence_length
def _convnet_layers(self, inputs, widths, is_train): """ Build convolutional network layers attached to the given input tensor """ conv_params = \ [ # conv1_x ConvParams(64, 3, (1, 1), 'same', False, True, 'conv1_1'), ConvParams(128, 3, (1, 1), 'same', False, True, 'conv1_2'), # conv2_x ConvParams(256, 1, (1, 1), 'same', False, True, 'conv2_1'), ConvParams(256, 3, (1, 1), 'same', False, True, 'resd2_1'), ConvParams(256, 3, (1, 1), 'same', False, True, 'resd2_2'), ConvParams(256, 3, (1, 1), 'same', False, True, 'conv2_2'), # conv3_x ConvParams(256, 1, (1, 1), 'same', False, True, 'conv3_1'), ConvParams(256, 3, (1, 1), 'same', False, True, 'resd3_1'), ConvParams(256, 3, (1, 1), 'same', False, True, 'resd3_2'), ConvParams(256, 3, (1, 1), 'same', False, True, 'conv3_2'), # conv4_x ConvParams(512, 1, (1, 1), 'same', False, True, 'conv4_1'), ConvParams(512, 3, (1, 1), 'same', False, True, 'resd4_1'), ConvParams(512, 3, (1, 1), 'same', False, True, 'resd4_2'), ConvParams(512, 3, (1, 1), 'same', False, True, 'conv4_2'), # conv5_x ConvParams(512, 1, (1, 1), 'same', False, True, 'conv5_1'), ConvParams(512, 3, (1, 1), 'same', False, True, 'resd5_1'), ConvParams(512, 3, (1, 1), 'same', False, True, 'resd5_2'), ConvParams(512, 3, (1, 1), 'same', False, True, 'conv5_2')] with tf.variable_scope("convnet"): conv1 = conv_layer(inputs, conv_params[0], is_train) conv1 = conv_layer(conv1, conv_params[1], is_train) conv2 = pool_layer(conv1, 2, 'valid', 'pool2') conv2 = residual_block(conv2, conv_params[3:5], is_train, shortcut_conv_param=conv_params[2], use_shortcut_conv=True) conv2 = conv_layer(conv2, conv_params[5], is_train) conv3 = pool_layer(conv2, 2, 'valid', 'pool3') for i in range(2): with tf.variable_scope('conv3_{}'.format(i)): conv3 = residual_block( conv3, conv_params[7:9], is_train, shortcut_conv_param=(conv_params[6] if i == 0 else None), use_shortcut_conv=(i == 0)) conv3 = conv_layer(conv3, conv_params[9], is_train) conv4 = conv3 for i in range(5): with tf.variable_scope('conv4_{}'.format(i)): conv4 = residual_block( conv4, conv_params[11:13], is_train, shortcut_conv_param=(conv_params[10] if i == 0 else None), use_shortcut_conv=(i == 0)) conv4 = conv_layer(conv4, conv_params[13], is_train) conv5 = conv4 for i in range(3): with tf.variable_scope('conv5_{}'.format(i)): conv5 = residual_block( conv5, conv_params[15:17], is_train, shortcut_conv_param=(conv_params[14] if i == 0 else None), use_shortcut_conv=(i == 0)) conv5 = conv_layer(conv5, conv_params[17], is_train) features = conv5 sequence_length = tf.reshape(widths // 4, [-1], name='seq_len') return features, sequence_length
def transformer_encoder(self, features, num_layers, hidden_size, is_train): """ """ with tf.variable_scope('transformer_enc'): attention_bias = 0 # Position encoding batch_size = tf.shape(features)[0] height = tf.shape(features)[1] width = tf.shape(features)[2] const_h = self.FLAGS.resize_hw.height // 4 const_w = self.FLAGS.resize_hw.width // 4 h_encoding = self.get_position_encoding(height, hidden_size, 'h_encoding') w_encoding = self.get_position_encoding(width, hidden_size, 'w_encoding') h_encoding = tf.expand_dims(h_encoding, axis=1) w_encoding = tf.expand_dims(w_encoding, axis=0) h_encoding = tf.tile(tf.expand_dims(h_encoding, axis=0), [batch_size, 1, 1, 1]) w_encoding = tf.tile(tf.expand_dims(w_encoding, axis=0), [batch_size, 1, 1, 1]) # Adaptive 2D potisiontal encoding inter = tf.reduce_mean(features, axis=[1, 2]) # [B, hidden] inter = dense_layer(inter, hidden_size // 2, name='intermediate', activation=tf.nn.relu) if is_train: inter = tf.nn.dropout(inter, self.dropout_rate) alpha = dense_layer(inter, 2 * hidden_size, name='alpha', activation=tf.nn.sigmoid) alpha = tf.reshape(alpha, [-1, 2, 1, hidden_size]) pos_encoding = alpha[:, 0:1, :, :] * h_encoding \ + alpha[:, 1:2, :, :] * w_encoding features += pos_encoding self.hw = tf.reduce_sum(alpha, axis=[2, 3]) # Save shape shape = (-1, height, width, hidden_size) features = tf.reshape(features, (-1, height * width, hidden_size)) # Dropout if is_train: features = tf.nn.dropout(features, self.dropout_rate) # Encoder stack ws = [] for n in range(num_layers): with tf.variable_scope("encoder_layer_%d" % n): with tf.variable_scope("self_attention"): # layer norm y = self.layer_norm(features, hidden_size) # self att y, w = self.attention_layer(y, y, hidden_size, attention_bias, 'self_att', is_train) ws.append(w) # dropout if is_train: y = tf.nn.dropout(y, self.dropout_rate) # skip features = y + features with tf.variable_scope("ffn"): # layer norm y = self.layer_norm(features, hidden_size) # cnn y = tf.reshape(features, shape) conv_params = [ ConvParams(self.filter_size, 1, (1, 1), 'same', False, True, 'expand'), ConvParams(self.filter_size, 3, (1, 1), 'same', False, True, 'dwconv'), ConvParams(self.hidden_size, 1, (1, 1), 'same', False, True, 'reduce') ] y = conv_layer(y, conv_params[0], is_train) y = depthwise_conv_layer(y, conv_params[1], is_train) y = conv_layer(y, conv_params[2], is_train) y = tf.reshape(y, (-1, height * width, hidden_size)) # skip features = y + features # Output normalization features = self.layer_norm(features, hidden_size) ws = tf.stack(ws, axis=1) return features, shape, ws