def res_build_block_v1(X, increase=False, init=1.0, stddev=1.0, training=False, projection=None, name=None): print('input shape for {} :: {}'.format(name, X.get_shape().as_list())) shortcut_projection = X stride = 1 in_channels = X.get_shape().as_list()[-1] out_channels = in_channels if increase: out_channels = 2 * in_channels stride = 2 if projection: name_ = name + 'projection_batch_norm' shortcut_projection = projection(shortcut_projection, out_channels=out_channels, training=training, name=name_) else: shortcut_projection = maxpool(shortcut_projection, filter_size=[1, 2, 2, 1], stride_size=[1, 2, 2, 1], padding='VALID') pad = (out_channels - in_channels) shortcut_projection = tf.pad(shortcut_projection, [[0, 0], [0, 0], [0, 0], [0, pad]]) with tf.name_scope('conv1layer'): name_ = name + 'conv1layer_batch_norm' X = conv(X, filter_size=3, out_channels=out_channels, stride_size=stride, padding='SAME', init_bias=init, stddev=stddev) X = batch_norm(X, training, name=name_) X = tf.nn.relu(X) with tf.name_scope('conv2layer'): name_ = name + 'conv2layer_batch_norm' X = conv(X, filter_size=3, out_channels=out_channels, stride_size=1, padding='SAME', init_bias=init, stddev=stddev) X = batch_norm(X, training, name=name_) with tf.name_scope('residual'): X += shortcut_projection X = tf.nn.relu(X) print('output shape for {} :: {}'.format(name, X.get_shape().as_list())) return X
def network(X, training, label_cnt, dropout_keep_prob): with tf.name_scope('pre_inception'): with tf.name_scope('conv1layer'): X = op.conv(X, filter_size=7, stride_size=2, padding='VALID', out_channels=64, a=tf.nn.relu) X = tf.pad(X, [[0, 0], [1, 1], [1, 1], [0, 0]]) X = op.maxpool(X, filter_size=3, stride_size=2, padding='VALID') X = op.lrn(X) with tf.name_scope('conv2layer'): X = op.conv(X, filter_size=3, stride_size=1, padding='SAME', out_channels=192, a=tf.nn.relu) X = op.maxpool(X, filter_size=3, stride_size=2, padding='VALID') X = op.lrn(X) with tf.name_scope('inception_blocks'): X = inception3a(X, training) X = inception3b(X, training) X = inception4a(X, training) logits1 = auxillary_logits(X, label_cnt, name='auxillary_layer1') X = inception4b(X, training) X = inception4c(X, training) X = inception4d(X, training) logits2 = auxillary_logits(X, label_cnt, name='auxillary_layer2') X = inception4e(X, training) X = inception5a(X, training) X = inception5b(X, training) with tf.name_scope('post_inception'): X = op.avgpool(X, filter_size=7, stride_size=1, padding='VALID') X = tf.nn.dropout(X, dropout_keep_prob) with tf.name_scope('fc1layer'): final_logits = op.fc(X, output_size=label_cnt, a=None) with tf.name_scope('Softmax'): out_probs = tf.nn.softmax(logits=X, axis=-1, name='softmax_op') return logits1, logits2, final_logits, out_probs
def add_generator(self, name_scope='SoundNet'): with tf.variable_scope(name_scope) as scope: self.layers = {} # Stream one: conv1 ~ conv7 self.layers[1] = conv2d(self.sound_input_placeholder, 1, 16, k_h=64, d_h=2, p_h=32, name_scope='conv1') self.layers[2] = batch_norm(self.layers[1], 16, self.config['eps'], name_scope='conv1') self.layers[3] = relu(self.layers[2], name_scope='conv1') self.layers[4] = maxpool(self.layers[3], k_h=8, d_h=8, name_scope='conv1') self.layers[5] = conv2d(self.layers[4], 16, 32, k_h=32, d_h=2, p_h=16, name_scope='conv2') self.layers[6] = batch_norm(self.layers[5], 32, self.config['eps'], name_scope='conv2') self.layers[7] = relu(self.layers[6], name_scope='conv2') self.layers[8] = maxpool(self.layers[7], k_h=8, d_h=8, name_scope='conv2') self.layers[9] = conv2d(self.layers[8], 32, 64, k_h=16, d_h=2, p_h=8, name_scope='conv3') self.layers[10] = batch_norm(self.layers[9], 64, self.config['eps'], name_scope='conv3') self.layers[11] = relu(self.layers[10], name_scope='conv3') self.layers[12] = conv2d(self.layers[11], 64, 128, k_h=8, d_h=2, p_h=4, name_scope='conv4') self.layers[13] = batch_norm(self.layers[12], 128, self.config['eps'], name_scope='conv4') self.layers[14] = relu(self.layers[13], name_scope='conv4') self.layers[15] = conv2d(self.layers[14], 128, 256, k_h=4, d_h=2, p_h=2, name_scope='conv5') self.layers[16] = batch_norm(self.layers[15], 256, self.config['eps'], name_scope='conv5') self.layers[17] = relu(self.layers[16], name_scope='conv5') self.layers[18] = maxpool(self.layers[17], k_h=4, d_h=4, name_scope='conv5') self.layers[19] = conv2d(self.layers[18], 256, 512, k_h=4, d_h=2, p_h=2, name_scope='conv6') self.layers[20] = batch_norm(self.layers[19], 512, self.config['eps'], name_scope='conv6') self.layers[21] = relu(self.layers[20], name_scope='conv6') self.layers[22] = conv2d(self.layers[21], 512, 1024, k_h=4, d_h=2, p_h=2, name_scope='conv7') self.layers[23] = batch_norm(self.layers[22], 1024, self.config['eps'], name_scope='conv7') self.layers[24] = relu(self.layers[23], name_scope='conv7') # Split one: conv8, conv8_2 # NOTE: here we use a padding of 2 to skip an unknown error # https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/common_shape_fns.cc#L45 self.layers[25] = conv2d(self.layers[24], 1024, 1000, k_h=8, d_h=2, p_h=2, name_scope='conv8') self.layers[26] = conv2d(self.layers[24], 1024, 401, k_h=8, d_h=2, p_h=2, name_scope='conv8_2')
def add_generator(self, name_scope='SoundNet'): with tf.variable_scope(name_scope) as scope: self.layers = {} # Stream one: conv1 ~ conv7 self.layers[1] = conv2d(self.sound_input_placeholder, 1, 16, k_h=64, d_h=2, p_h=32, name_scope='conv1') self.layers[2] = batch_norm(self.layers[1], 16, self.config['eps'], name_scope='conv1') self.layers[3] = relu(self.layers[2], name_scope='conv1') self.layers[4] = maxpool(self.layers[3], k_h=8, d_h=8, name_scope='conv1') self.layers[5] = conv2d(self.layers[4], 16, 32, k_h=32, d_h=2, p_h=16, name_scope='conv2') self.layers[6] = batch_norm(self.layers[5], 32, self.config['eps'], name_scope='conv2') self.layers[7] = relu(self.layers[6], name_scope='conv2') self.layers[8] = maxpool(self.layers[7], k_h=8, d_h=8, name_scope='conv2') self.layers[9] = conv2d(self.layers[8], 32, 64, k_h=16, d_h=2, p_h=8, name_scope='conv3') self.layers[10] = batch_norm(self.layers[9], 64, self.config['eps'], name_scope='conv3') self.layers[11] = relu(self.layers[10], name_scope='conv3') self.layers[12] = conv2d(self.layers[11], 64, 128, k_h=8, d_h=2, p_h=4, name_scope='conv4') self.layers[13] = batch_norm(self.layers[12], 128, self.config['eps'], name_scope='conv4') self.layers[14] = relu(self.layers[13], name_scope='conv4') self.layers[15] = conv2d(self.layers[14], 128, 256, k_h=4, d_h=2, p_h=2, name_scope='conv5') self.layers[16] = batch_norm(self.layers[15], 256, self.config['eps'], name_scope='conv5') self.layers[17] = relu(self.layers[16], name_scope='conv5') self.layers[18] = maxpool(self.layers[17], k_h=4, d_h=4, name_scope='conv5') self.layers[19] = conv2d(self.layers[18], 256, 512, k_h=4, d_h=2, p_h=2, name_scope='conv6') self.layers[20] = batch_norm(self.layers[19], 512, self.config['eps'], name_scope='conv6') self.layers[21] = relu(self.layers[20], name_scope='conv6') self.layers[22] = conv2d(self.layers[21], 512, 1024, k_h=4, d_h=2, p_h=2, name_scope='conv7') self.layers[23] = batch_norm(self.layers[22], 1024, self.config['eps'], name_scope='conv7') self.layers[24] = relu(self.layers[23], name_scope='conv7') # Split one: conv8, conv8_2 self.layers[25] = conv2d(self.layers[24], 1024, 1000, k_h=8, d_h=2, name_scope='conv8') self.layers[26] = conv2d(self.layers[24], 1024, 401, k_h=8, d_h=2, name_scope='conv8_2')
def build(self, rgb, label_num, kp, last_layer_type="softmax"): assert rgb.get_shape().as_list()[1:] == [224, 224, 3] self.conv1 = conv_layer(rgb, 7, 3, 64, 2, "scale1") self.conv1 = bn(self.conv1, is_training=self.is_training, name="scale1") self.conv1 = tf.nn.relu(self.conv1) self.conv1 = maxpool(self.conv1, 3, 2, "pool1") with tf.variable_scope("scale2"): self.block1_1 = res_block_3_layer(self.conv1, [64, 64, 256], "block1", change_dimension=True, block_stride=1, is_training=self.is_training) self.block1_2 = res_block_3_layer(self.block1_1, [64, 64, 256], "block2", change_dimension=False, block_stride=1, is_training=self.is_training) self.block1_3 = res_block_3_layer(self.block1_2, [64, 64, 256], "block3", change_dimension=False, block_stride=1, is_training=self.is_training) with tf.variable_scope("scale3"): self.block2_1 = res_block_3_layer(self.block1_3, [128, 128, 512], "block1", change_dimension=True, block_stride=2, is_training=self.is_training) self.block2_2 = res_block_3_layer(self.block2_1, [128, 128, 512], "block2", change_dimension=False, block_stride=1, is_training=self.is_training) self.block2_3 = res_block_3_layer(self.block2_2, [128, 128, 512], "block3", change_dimension=False, block_stride=1, is_training=self.is_training) self.block2_4 = res_block_3_layer(self.block2_3, [128, 128, 512], "block4", change_dimension=False, block_stride=1, is_training=self.is_training) with tf.variable_scope("scale4"): self.block3_1 = res_block_3_layer(self.block2_4, [256, 256, 1024], "block1", change_dimension=True, block_stride=2, is_training=self.is_training) self.block3_2 = res_block_3_layer(self.block3_1, [256, 256, 1024], "block2", change_dimension=False, block_stride=1, is_training=self.is_training) self.block3_3 = res_block_3_layer(self.block3_2, [256, 256, 1024], "block3", change_dimension=False, block_stride=1, is_training=self.is_training) self.block3_4 = res_block_3_layer(self.block3_3, [256, 256, 1024], "block4", change_dimension=False, block_stride=1, is_training=self.is_training) self.block3_5 = res_block_3_layer(self.block3_4, [256, 256, 1024], "block5", change_dimension=False, block_stride=1, is_training=self.is_training) self.block3_6 = res_block_3_layer(self.block3_5, [256, 256, 1024], "block6", change_dimension=False, block_stride=1, is_training=self.is_training) with tf.variable_scope("scale5"): self.block4_1 = res_block_3_layer(self.block3_6, [512, 512, 2048], "block1", change_dimension=True, block_stride=2, is_training=self.is_training) self.block4_2 = res_block_3_layer(self.block4_1, [512, 512, 2048], "block2", change_dimension=False, block_stride=1, is_training=self.is_training) self.block4_3 = res_block_3_layer(self.block4_2, [512, 512, 2048], "block3", change_dimension=False, block_stride=1, is_training=self.is_training) with tf.variable_scope("fc"): self.pool2 = maxpool(self.block4_3, 7, 1, "pool2") self.fc1 = fc_layer(self.pool2, 2048, 2048, "fc1") self.fc1 = tf.nn.relu(tf.nn.dropout(self.fc1, keep_prob=kp)) self.fc2 = fc_layer(self.fc1, 2048, label_num, "fc2") if last_layer_type == "sigmoid": self.prob = tf.nn.sigmoid(self.fc2) elif last_layer_type == "softmax": self.prob = tf.nn.softmax(self.fc2) elif last_layer_type == "no": self.prob = self.fc2 return self.prob
def network(X, training=False, dropout_keep_prob=1.0, type_=18, label_cnt=1000): # need to change pre_res conv for matching output shape of 112,112 i.e. add padding of some amount. with tf.name_scope('pre_res'): X = conv(X, filter_size=7, out_channels=64, stride_size=2, padding='SAME') X = batch_norm(X, training=training, name='pre_res_batch_norm') X = tf.nn.relu(X) X = tf.pad(X, [[0, 0], [1, 1], [1, 1], [0, 0]]) X = maxpool(X, stride_size=2, filter_size=3, padding='VALID') num_units34 = [3, 4, 6, 3] num_units18 = [2, 2, 2, 2] num_units50 = [3, 4, 6, 3] num_units101 = [3, 4, 23, 3] num_units152 = [3, 8, 36, 3] if type_ == 18: num_units = num_units18 res_block = res_build_block_v1 elif type == 34: num_units = num_units34 res_block = res_build_block_v1 elif type == 50: num_units = num_units50 res_block = bottleneck_block_v1 elif type == 101: num_units = num_units101 res_block = bottleneck_block_v1 else: num_units = num_units152 res_block = bottleneck_block_v1 for x in range(len(num_units)): name_res_block = 'res_{}_'.format(x + 1) with tf.name_scope(name_res_block): increase = True if x == 0: increase = False inner_block = 'block_{}'.format(1) with tf.name_scope(inner_block): name = name_res_block + inner_block X = res_block(X, training=training, increase=increase, projection=projection_, name=name) for y in range(1, num_units[x]): inner_block = 'block_{}'.format(y + 1) with tf.name_scope(inner_block): name = name_res_block + inner_block X = res_block(X, training=training, increase=False, projection=projection_, name=name) with tf.name_scope('final_layer'): X = fc(X, label_cnt, a=None) with tf.name_scope('softmaxlayer'): out_probs = tf.nn.softmax(logits=X, axis=-1, name='softmax_op') return X, out_probs
def inception_block(X, conv3x3reduce_, conv1x1_, conv3x3_, conv5x5reduce_, conv5x5_, pool_proj_, name, training): with tf.name_scope(name): conv1x1 = op.conv(X, filter_size=1, stride_size=1, padding='SAME', out_channels=conv1x1_, a=None) conv3x3reduce = op.conv(X, filter_size=1, stride_size=1, padding='SAME', out_channels=conv3x3reduce_, a=tf.nn.relu) conv3x3 = op.conv(X, filter_size=3, stride_size=1, padding='SAME', out_channels=conv3x3_, a=None) conv5x5reduce = op.conv(X, filter_size=1, stride_size=1, padding='SAME', out_channels=conv5x5reduce_, a=tf.nn.relu) conv5x5 = op.conv(X, filter_size=5, stride_size=1, padding='SAME', out_channels=conv5x5_, a=None) pool = op.maxpool(X, filter_size=3, stride_size=2, padding='VALID') pool_proj = op.conv(X, filter_size=1, stride_size=1, padding='SAME', out_channels=pool_proj_, a=None) conv1x1 = op.batch_norm(conv1x1, training=training, name=name + '_conv1x1_batchnorm') conv3x3 = op.batch_norm(conv3x3, training=training, name=name + '_conv3x3_batchnorm') conv5x5 = op.batch_norm(conv5x5, training=training, name=name + '_conv5x5_batchnorm') pool_prol = op.batch_norm(pool_proj, training=training, name=name + '_pool_batchnorm') conv1x1 = tf.nn.relu(conv1x1) conv3x3 = tf.nn.relu(conv3x3) conv5x5 = tf.nn.relu(conv5x5) pool_prol = tf.nn.relu(pool_proj) out = tf.concat([conv1x1, conv3x3, conv5x5, pool_proj], axis=3, name='output_block') return out