def vgg16Netvlad(image_batch): ''' Assumes rank 4 input, first 3 axiss fixed or dynamic, last axis 1 or 3. ''' assert len(image_batch.shape) == 4 with tf.variable_scope('vgg16_netvlad_pca'): # Gray to color if necessary. if image_batch.shape[3] == 1: x = tf.nn.conv2d(image_batch, np.ones((1, 1, 1, 3)), np.ones(4).tolist(), 'VALID') else: assert image_batch.shape[3] == 3 x = image_batch # Subtract trained average image. average_rgb = tf.get_variable('average_rgb', 3, dtype=image_batch.dtype) x = x - average_rgb # VGG16 def vggConv(inputs, numbers, out_axis, with_relu): if with_relu: activation = tf.nn.relu else: activation = None return tf.layers.conv2d(inputs, out_axis, [3, 3], 1, padding='same', activation=activation, name='conv%s' % numbers) def vggPool(inputs): return tf.layers.max_pooling2d(inputs, 2, 2) x = vggConv(x, '1_1', 64, True) x = vggConv(x, '1_2', 64, False) x = vggPool(x) x = tf.nn.relu(x) x = vggConv(x, '2_1', 128, True) x = vggConv(x, '2_2', 128, False) x = vggPool(x) x = tf.nn.relu(x) x = vggConv(x, '3_1', 256, True) x = vggConv(x, '3_2', 256, True) x = vggConv(x, '3_3', 256, False) x = vggPool(x) x = tf.nn.relu(x) x = vggConv(x, '4_1', 512, True) x = vggConv(x, '4_2', 512, True) x = vggConv(x, '4_3', 512, False) x = vggPool(x) x = tf.nn.relu(x) x = vggConv(x, '5_1', 512, True) x = vggConv(x, '5_2', 512, True) x = vggConv(x, '5_3', 512, False) # NetVLAD x = tf.nn.l2_normalize(x, axis=-1) x = layers.netVLAD(x, 64) return x
def netFromMat(): ''' Method for parsing vd16_pitts30k_conv5_3_vlad_preL2_intra_white.mat , probably also others, but not tested. Requires structed.mat in the matlab folder, which can be generated with matlab/net_class2struct.m, otherwise python can't read the parameteres of the custom layers. ''' # %% Load mat from netvlad. mat = scio.loadmat(structedMatPath(), struct_as_record=False, squeeze_me=True) matnet = mat['net'] mat_layers = matnet.layers # %% Spyder section for debugging. tf_layers = [tf.placeholder(dtype=tf.float32, shape=[None, None, None, 3])] with tf.variable_scope('vgg16_netvlad_pca'): # Additional layer for average image normalization. average_rgb = tf.get_variable( 'average_rgb', 3, initializer=tf.constant_initializer( matnet.meta.normalization.averageImage[0, 0, :])) tf_layers.append(tf_layers[-1] - average_rgb) for i in range(len(mat_layers)): layer = mat_layers[i] # make name TF-friendly: layer.name = layer.name.replace(':', '_') # Print layer info assert hasattr(layer, 'name') print('%02d: %s: %s' % (i + 1, layer.name, layer.type)) if layer.type == 'conv': w = layer.weights[0] b = layer.weights[1] if len(tf_layers[-1].shape) == 4: assert np.all(layer.pad == 1) tf_layers.append( tf.layers.conv2d( tf_layers[-1], b.size, w.shape[:2], strides=layer.stride, padding='same', activation=None, kernel_initializer=tf.constant_initializer(w), bias_initializer=tf.constant_initializer(b), name=layer.name)) else: # PCA convolution assert len(tf_layers[-1].shape) == 2 assert layer.name == 'WPCA' assert layer.pad == 0 w = np.expand_dims(np.expand_dims(w, 0), 0) tf_layers.append( tf.layers.conv2d( tf.expand_dims(tf.expand_dims(tf_layers[-1], 1), 1), b.size, w.shape[:2], strides=layer.stride, padding='valid', activation=None, kernel_initializer=tf.constant_initializer(w), bias_initializer=tf.constant_initializer(b), name=layer.name)) elif layer.type == 'relu': assert layer.leak == 0 tf_layers.append(tf.nn.relu(tf_layers[-1])) elif layer.type == 'pool': assert layer.method == 'max' assert np.all(layer.pad == 0) tf_layers.append( tf.layers.max_pooling2d(tf_layers[-1], layer.pool, layer.stride, name=layer.name)) elif layer.type == 'normalize': p = layer.param # Asserting desired normalization is l2 accross all layers. # See http://www.vlfeat.org/matconvnet/mfiles/vl_nnnormalize/ assert np.all(p[[0, 2, 3]] == np.array([1024, 1, 0.5])) tf_layers.append( layers.matconvnetNormalize(tf_layers[-1], p[1])) elif layer.type == 'custom': if layer.name == 'vlad_core': a = layer.weights[0] c = layer.weights[1] tf_layers.append( layers.netVLAD( tf_layers[-1], layer.K, assign_weight_initializer=tf.constant_initializer( a), cluster_initializer=tf.constant_initializer(c), skip_postnorm=True)) elif layer.name == 'postL2': reshaped = tf.transpose(tf_layers[-1], perm=[0, 2, 1]) tf_layers.append( layers.matconvnetNormalize(tf.layers.flatten(reshaped), 1e-12)) elif layer.name == 'finalL2': tf_layers.append( layers.matconvnetNormalize( tf.layers.flatten(tf_layers[-1]), 1e-12)) else: raise Exception('Unknown custom layer %s' % layer.name) else: raise Exception('Unknown layer type %s' % layer.type) print(tf_layers[-1].shape) # %% Spyder section for debugging. return tf_layers
def vgg16NetvladPca(image_batch): ''' Assumes rank 4 input, first 3 dims fixed or dynamic, last dim 1 or 3. ''' assert len(image_batch.shape) == 4 with tf.compat.v1.variable_scope('vgg16_netvlad_pca'): # Gray to color if necessary. if image_batch.shape[3] == 1: x = tf.nn.conv2d(image_batch, np.ones((1, 1, 1, 3)), np.ones(4).tolist(), 'VALID') else: assert image_batch.shape[3] == 3 x = image_batch # Subtract trained average image. average_rgb = tf.compat.v1.get_variable('average_rgb', 3, dtype=image_batch.dtype) x = x - average_rgb # VGG16 def vggConv(inputs, numbers, out_dim, with_relu): if with_relu: activation = tf.nn.relu else: activation = None return tf.layers.conv2d(inputs, out_dim, [3, 3], 1, padding='same', activation=activation, name='conv%s' % numbers) def vggPool(inputs): return tf.keras.layers.MaxPool2D(inputs, 2, 2) x = vggConv(x, '1_1', 64, True) x = vggConv(x, '1_2', 64, False) x = vggPool(x) x = tf.nn.relu(x) x = vggConv(x, '2_1', 128, True) x = vggConv(x, '2_2', 128, False) x = vggPool(x) x = tf.nn.relu(x) x = vggConv(x, '3_1', 256, True) x = vggConv(x, '3_2', 256, True) x = vggConv(x, '3_3', 256, False) x = vggPool(x) x = tf.nn.relu(x) x = vggConv(x, '4_1', 512, True) x = vggConv(x, '4_2', 512, True) x = vggConv(x, '4_3', 512, False) x = vggPool(x) x = tf.nn.relu(x) x = vggConv(x, '5_1', 512, True) x = vggConv(x, '5_2', 512, True) x = vggConv(x, '5_3', 512, False) # NetVLAD x = tensorflow.python.ops.nn_impl(x, dim=-1) x = layers.netVLAD(x, 64) # PCA x = tf.keras.layers.Conv2D(tf.expand_dims(tf.expand_dims(x, 1), 1), 4096, 1, 1, name='WPCA') x = tf.nn.l2_normalize(tf.layers.flatten(x), dim=-1) return x
def netFromMat(): ''' Method for parsing vd16_pitts30k_conv5_3_vlad_preL2_intra_white.mat , probably also others, but not tested. Requires structed.mat in the matlab folder, which can be generated with matlab/net_class2struct.m, otherwise python can't read the parameteres of the custom layers. ''' #%% Load mat from netvlad. mat = scio.loadmat(structedMatPath(), struct_as_record=False, squeeze_me=True) matnet = mat['net'] mat_layers = matnet.layers #%% Spyder section for debugging. tf_layers = [tf.placeholder(dtype=tf.float32, shape=[None, None, None, 3])] with tf.variable_scope('vgg16_netvlad_pca'): # Additional layer for average image normalization. average_rgb = tf.get_variable( 'average_rgb', 3, initializer=tf.constant_initializer( matnet.meta.normalization.averageImage[0,0,:])) tf_layers.append(tf_layers[-1] - average_rgb) for i in range(len(mat_layers)): layer = mat_layers[i] # make name TF-friendly: layer.name = layer.name.replace(':', '_') # Print layer info assert hasattr(layer, 'name') print('%02d: %s: %s' % (i + 1, layer.name, layer.type)) if layer.type == 'conv': w = layer.weights[0] b = layer.weights[1] if len(tf_layers[-1].shape) == 4: assert np.all(layer.pad == 1) tf_layers.append(tf.layers.conv2d( tf_layers[-1], b.size, w.shape[:2], strides=layer.stride, padding='same', activation=None, kernel_initializer=tf.constant_initializer(w), bias_initializer=tf.constant_initializer(b), name=layer.name)) else: # PCA convolution assert len(tf_layers[-1].shape) == 2 assert layer.name == 'WPCA' assert layer.pad == 0 w = np.expand_dims(np.expand_dims(w, 0), 0) tf_layers.append(tf.layers.conv2d( tf.expand_dims(tf.expand_dims( tf_layers[-1], 1), 1), b.size, w.shape[:2], strides=layer.stride, padding='valid', activation=None, kernel_initializer=tf.constant_initializer(w), bias_initializer=tf.constant_initializer(b), name=layer.name)) elif layer.type == 'relu': assert layer.leak == 0 tf_layers.append(tf.nn.relu(tf_layers[-1])) elif layer.type == 'pool': assert layer.method == 'max' assert np.all(layer.pad == 0) tf_layers.append(tf.layers.max_pooling2d( tf_layers[-1], layer.pool, layer.stride, name=layer.name)) elif layer.type == 'normalize': p = layer.param # Asserting desired normalization is l2 accross all layers. # See http://www.vlfeat.org/matconvnet/mfiles/vl_nnnormalize/ assert np.all(p[[0, 2, 3]] == np.array([1024, 1, 0.5])) tf_layers.append(layers.matconvnetNormalize( tf_layers[-1], p[1])) elif layer.type == 'custom': if layer.name == 'vlad_core': a = layer.weights[0] c = layer.weights[1] tf_layers.append(layers.netVLAD( tf_layers[-1], layer.K, assign_weight_initializer=tf.constant_initializer(a), cluster_initializer=tf.constant_initializer(c), skip_postnorm=True)) elif layer.name == 'postL2': reshaped = tf.transpose(tf_layers[-1], perm=[0, 2, 1]) tf_layers.append(layers.matconvnetNormalize( tf.layers.flatten(reshaped), 1e-12)) elif layer.name == 'finalL2': tf_layers.append(layers.matconvnetNormalize( tf.layers.flatten(tf_layers[-1]), 1e-12)) else: raise Exception('Unknown custom layer %s' % layer.name) else: raise Exception('Unknown layer type %s' % layer.type) print(tf_layers[-1].shape) #%% Spyder section for debugging. return tf_layers
def vgg16NetvladPca(image_batch): ''' Assumes rank 4 input, first 3 dims fixed or dynamic, last dim 1 or 3. ''' assert len(image_batch.shape) == 4 with tf.variable_scope('vgg16_netvlad_pca'): # Gray to color if necessary. if image_batch.shape[3] == 1: x = tf.nn.conv2d(image_batch, np.ones((1, 1, 1, 3)), np.ones(4).tolist(), 'VALID') else : assert image_batch.shape[3] == 3 x = image_batch # Subtract trained average image. average_rgb = tf.get_variable( 'average_rgb', 3, dtype=image_batch.dtype) x = x - average_rgb # VGG16 def vggConv(inputs, numbers, out_dim, with_relu): if with_relu: activation = tf.nn.relu else: activation = None return tf.layers.conv2d(inputs, out_dim, [3, 3], 1, padding='same', activation=activation, name='conv%s' % numbers) def vggPool(inputs): return tf.layers.max_pooling2d(inputs, 2, 2) x = vggConv(x, '1_1', 64, True) x = vggConv(x, '1_2', 64, False) x = vggPool(x) x = tf.nn.relu(x) x = vggConv(x, '2_1', 128, True) x = vggConv(x, '2_2', 128, False) x = vggPool(x) x = tf.nn.relu(x) x = vggConv(x, '3_1', 256, True) x = vggConv(x, '3_2', 256, True) x = vggConv(x, '3_3', 256, False) x = vggPool(x) x = tf.nn.relu(x) x = vggConv(x, '4_1', 512, True) x = vggConv(x, '4_2', 512, True) x = vggConv(x, '4_3', 512, False) x = vggPool(x) x = tf.nn.relu(x) x = vggConv(x, '5_1', 512, True) x = vggConv(x, '5_2', 512, True) x = vggConv(x, '5_3', 512, False) # NetVLAD x = tf.nn.l2_normalize(x, dim=-1) x = layers.netVLAD(x, 64) # PCA x = tf.layers.conv2d(tf.expand_dims(tf.expand_dims(x, 1), 1), 4096, 1, 1, name='WPCA') x = tf.nn.l2_normalize(tf.layers.flatten(x), dim=-1) return x