def __init__(self, net, labels_one_hot, model_params, method_params): """Stores argument in member variable for further use. Args: net: A tensor with shape [batch_size, num_features, feature_size] which contains some extracted image features. labels_one_hot: An optional (can be None) ground truth labels for the input features. Is a tensor with shape [batch_size, seq_length, num_char_classes] model_params: A namedtuple with model parameters (model.ModelParams). method_params: A SequenceLayerParams instance. """ self._params = model_params self._mparams = method_params self._net = net self._labels_one_hot = labels_one_hot self._batch_size = net.get_shape().dims[0].value # Initialize parameters for char logits which will be computed on the fly # inside an LSTM decoder. self._char_logits = {} regularizer = slim.l2_regularizer(self._mparams.weight_decay) self._softmax_w = slim.model_variable( 'softmax_w', [self._mparams.num_lstm_units, self._params.num_char_classes], initializer=orthogonal_initializer, regularizer=regularizer) self._softmax_b = slim.model_variable( 'softmax_b', [self._params.num_char_classes], initializer=tf.zeros_initializer(), regularizer=regularizer)
def inst_norm(x, train, data_format='NHWC', name=None, affine=False, act=lrelu, epsilon=1e-5): with tf.variable_scope(name, default_name='Inst', reuse=None) as vs: if x.get_shape().ndims == 4 and data_format == 'NCHW': x = nchw_to_nhwc(x) if x.get_shape().ndims == 4: mean_dim = [1,2] else: # 2 mean_dim = [1] mu, sigma_sq = tf.nn.moments(x, mean_dim, keep_dims=True) inv = tf.rsqrt(sigma_sq+epsilon) normalized = (x-mu)*inv if affine: var_shape = [x.get_shape()[-1]] shift = slim.model_variable('shift', shape=var_shape, initializer=tf.zeros_initializer) scale = slim.model_variable('scale', shape=var_shape, initializer=tf.ones_initializer) out = scale*normalized + shift else: out = normalized if x.get_shape().ndims == 4 and data_format == 'NCHW': out = nhwc_to_nchw(out) if act is None: return out else: return act(out)
def gene_pair_convolution(inputs, batch_size, kernel_size, activation_fn, name='Conv'): """ conv function per gene """ num_output = kernel_size[2] input_list = [tf.squeeze(x) for x in tf.split(inputs, batch_size)] output_list = [] with tf.name_scope(name) as scope: for i in range(0, num_output): # weights shape = [num_genes, 2] and bias shape = [num_genes] weights = slim.model_variable(scope + 'weights/kernel/' + str(i), shape=kernel_size[0:2]) bias = slim.model_variable(scope + 'bias/kernel/' + str(i), shape=kernel_size[0]) ytmp = [] for xtmp in input_list: ztmp = tf.squeeze(tf.reduce_sum(tf.multiply(xtmp, weights), 1)) + bias ztmp = activation_fn(ztmp) ytmp.append(ztmp) # y is batch_size elements of [num_genes] # output is [batch_size, num_genes] output_list.append(tf.stack(ytmp)) # output_list is num_output instances of [batch_size, num_genes] output = tf.stack(output_list) output = tf.transpose(output, perm=[1, 2, 0]) # output is of shape [batch_size, num_genes, num_output] return output
def Attention(net, labels_one_hot): with tf.variable_scope("Attention"): regularizer = slim.l2_regularizer(0.00004) _softmax_w = slim.model_variable('softmax_w', [LSTM_UNITS_NUMBER, CLASSES_NUMBER], initializer=orthogonal_initializer, regularizer=regularizer) _softmax_b = slim.model_variable('softmax_b', [CLASSES_NUMBER], initializer=tf.zeros_initializer(), regularizer=regularizer) _zero_label = tf.zeros([BATCH_SIZE, CLASSES_NUMBER]) first_label = _zero_label decoder_inputs = [first_label] + [None] * (SEQ_LENGTH - 1) lstm_cell = tf.contrib.rnn.LSTMCell(LSTM_UNITS_NUMBER, use_peepholes=False, cell_clip=10., state_is_tuple=True, initializer=orthogonal_initializer) _char_logits = {} def char_logit(inputs, char_index): if char_index not in _char_logits: _char_logits[char_index] = tf.nn.xw_plus_b( inputs, _softmax_w, _softmax_b) return _char_logits[char_index] def char_one_hot(self, logit): prediction = tf.argmax(logit, axis=1) return slim.one_hot_encoding(prediction, CLASSES_NUMBER) def get_input(prev, i): if i == 0: return _zero_label else: if labels_one_hot != None: return labels_one_hot[:, i - 1, :] else: logit = char_logit(prev, char_index=i - 1) return char_one_hot(logit) lstm_outputs, _ = tf.contrib.legacy_seq2seq.attention_decoder( decoder_inputs=decoder_inputs, initial_state=lstm_cell.zero_state(BATCH_SIZE, tf.float32), attention_states=net, cell=lstm_cell, loop_function=get_input) logits_list = [ tf.expand_dims(char_logit(logit, i), dim=1) for i, logit in enumerate(lstm_outputs) ] return tf.concat(logits_list, 1)
def instance_norm(input, name="instance_norm"): with tf.variable_scope(name): depth = input.get_shape()[3] scale = slim.model_variable("scale", [depth], initializer=tf.random_normal_initializer(1.0, 0.02, dtype=tf.float32)) offset = slim.model_variable("offset", [depth], initializer=tf.constant_initializer(0.0)) mean, variance = tf.nn.moments(input, axes=[1,2], keep_dims=True) epsilon = 1e-5 inv = tf.rsqrt(variance + epsilon) normalized = (input-mean)*inv return scale*normalized + offset
def conv2d_transpose(inputs, filter_size, output_shape, strides, initializer=tf.keras.initializers.he_normal(), padding='SAME', weight_decay=0.0005, scope=None): """ transpose conv2d :param inputs: :param filter_size: :param output_shape: :param strides: :param initializer: :param padding: :param weight_decay: :param scope: :return: """ with tf.variable_scope(scope, 'Conv2dTranspose'): filters = slim.model_variable('weights', shape=filter_size, regularizer=slim.l2_regularizer(weight_decay), initializer=initializer, ) if isinstance(strides, int): strides = [1, strides, strides, 1] return tf.nn.conv2d_transpose(inputs, filter=filters, output_shape=output_shape, strides=strides, padding=padding)
def compute_votes(poses_i, o, regularizer, tag=False): """Compute the votes by multiplying input poses by transformation matrix. Multiply the poses of layer i by the transform matrix to compute the votes for layer j. Author: Ashley Gritzman 19/10/2018 Credit: Suofei Zhang's implementation on GitHub, "Matrix-Capsules-EM-Tensorflow" https://github.com/www0wwwjs1/Matrix-Capsules-EM-Tensorflow Args: poses_i: poses in layer i tiled according to the kernel (N*OH*OW, kh*kw*i, 16) (64*5*5, 9*8, 16) o: number of output capsules, also called "parent_caps" regularizer: Returns: votes: (N*OH*OW, kh*kw*i, o, 16) (64*5*5, 9*8, 32, 16) """ batch_size = int(poses_i.get_shape()[0]) # 64*5*5 kh_kw_i = int(poses_i.get_shape()[1]) # 9*8 # (64*5*5, 9*8, 16) -> (64*5*5, 9*8, 1, 4, 4) output = tf.reshape(poses_i, shape=[batch_size, kh_kw_i, 1, 4, 4]) # the output of capsule is miu, the mean of a Gaussian, and activation, the # sum of probabilities it has no relationship with the absolute values of w # and votes using weights with bigger stddev helps numerical stability w = slim.model_variable( 'w', shape=[1, kh_kw_i, o, 4, 4], dtype=tf.float32, initializer=tf.truncated_normal_initializer(mean=0.0, stddev=1.0), #1.0 regularizer=regularizer) # (1, 9*8, 32, 4, 4) -> (64*5*5, 9*8, 32, 4, 4) w = tf.tile(w, [batch_size, 1, 1, 1, 1]) # (64*5*5, 9*8, 1, 4, 4) -> (64*5*5, 9*8, 32, 4, 4) output = tf.tile(output, [1, 1, o, 1, 1]) # (64*5*5, 9*8, 32, 4, 4) x (64*5*5, 9*8, 32, 4, 4) # -> (64*5*5, 9*8, 32, 4, 4) mult = tf.matmul(output, w) # (64*5*5, 9*8, 32, 4, 4) -> (64*5*5, 9*8, 32, 16) votes = tf.reshape(mult, [batch_size, kh_kw_i, o, 16]) # tf.summary.histogram('w', w) return votes
def relational_gin_mlp_neighborhood_aggregation( inputs, num_outputs, adjacency, layers=2, activation_fn=None, weights_initializer=tf.glorot_uniform_initializer(), biases_initializer=tf.zeros_initializer(), scope=None): """aggregate features from neighbors""" with tf.variable_scope(scope, 'gin_mlp_neigh_agg', [inputs, adjacency]): num_edge_types = adjacency.shape[1].value edge_layers = [] for i in range(num_edge_types): with tf.variable_scope('relation_{}'.format(i + 1), values=[inputs, adjacency]): eps = slim.model_variable('epsilon', shape=[]) adj_i = adjacency[:, i, :, :] x_i = tf.multiply(1. + eps, inputs) + tf.matmul(adj_i, inputs) edge_outputs = slim.repeat( x_i, layers, slim.linear, num_outputs=num_outputs, weights_initializer=weights_initializer, biases_initializer=biases_initializer) edge_layers.append(edge_outputs) edge_layers = tf.stack(edge_layers, axis=1) x = tf.reduce_sum(edge_layers, axis=1) if activation_fn is not None: x = activation_fn(x) return x
def __init__(self, net, labels_one_hot): self.net = net self.batch_size = self.net.get_shape().dims[0].value self.zero_labels = tf.zeros([self.batch_size, CFG.CLASSES_NUMS]) self.labels_one_hot = labels_one_hot self.char_logits = {} regularizer = slim.l2_regularizer(0.0) self.softmax_w = slim.model_variable( 'softmax_w', [CFG.NUM_LSTM_UNITS, CFG.CLASSES_NUMS], initializer=orthogonal_initializer, regularizer=regularizer) self.softmax_b = slim.model_variable( 'softmax_b', [CFG.CLASSES_NUMS], initializer=tf.zeros_initializer(), regularizer=regularizer)
def get_filters(length, num, scope, init=1, dtype=tf.float32): """Gets the filters based on gaussian or cauchy distribution. Gaussian and Cauchy distributions are very similar, we find that cauchy can converge more quickly. Args: length: The temporal length of the filter num: number of distributions scope: variable scope init: std variance dtype: layer type Returns: the filters """ with tf.variable_scope(scope): # create slim variables for the center and std of distribution center = contrib_slim.model_variable( 'tgm-center', shape=[num], initializer=tf.initializers.random_normal(0, 0.5)) gamma = contrib_slim.model_variable( 'tgm-gamma', shape=[num], initializer=tf.initializers.random_normal(0, init)) # create gaussians (eqs from paper) center = tf.cast(tf.tanh(center), dtype) gamma = tf.cast(tf.tanh(gamma), dtype) center = tf.expand_dims((length - 1) * (center + 1) / 2, -1) gamma = tf.expand_dims( tf.expand_dims(tf.exp(1.5 - 2 * tf.abs(gamma)), -1), -1) a = tf.expand_dims(tf.cast(tf.zeros(num), dtype), -1) a += center b = tf.cast(tf.range(length), dtype) f = b - tf.expand_dims(a, -1) f = f / gamma f = np.pi * gamma * tf.square(f) + 1 f = 1.0 / f f = f / tf.expand_dims(tf.reduce_sum(f, axis=2) + 1e-6, -1) return tf.squeeze(f)
def _bahdanau_attention(self, memory, seq_lens, maxlen, query, size, batch_size, idx=0, name='Attention'): WD = self.network_architecture['L2'] with tf.variable_scope(name) as scope: with slim.arg_scope( [slim.model_variable], initializer=self.network_architecture['initializer']( self._seed + idx), regularizer=slim.l2_regularizer(WD), device='/GPU:0'): # Define Attention Parameters v = slim.model_variable('v', shape=[1, size]) U = slim.model_variable('u', shape=[size, size]) W = slim.model_variable('w', shape=[size, size]) #with tf.variable_scope('ADV') as scope: biases = slim.model_variable( 'biases', shape=[size], initializer=tf.constant_initializer(0.1)) tmp_a = tf.reshape(memory, [-1, size]) tmp_a = tf.matmul(tmp_a, U) tmp_a = tf.reshape(tmp_a, [batch_size, -1, size]) tmp_q = tf.matmul(query, W) tmp_q = tf.expand_dims(tmp_q, axis=1) tmp = tf.nn.tanh(tmp_q + tmp_a + biases) tmp = tf.reshape(tmp, [-1, size]) tmp = tf.matmul(tmp, v, transpose_b=True) tmp = tf.reshape(tmp, [batch_size, -1]) mask = tf.sequence_mask(seq_lens, maxlen=maxlen, dtype=tf.float32) a = tf.exp(tmp) * mask attention = a / tf.reduce_sum(a, axis=1, keep_dims=True) outputs = tf.reduce_sum(tf.expand_dims(attention, 2) * memory, axis=1) return outputs, attention
def fully_connected_class(features,feature_dim,num_classes): # Higher-Order Relationships with slim.variable_scope.variable_scope("ball", reuse=None): weights = slim.model_variable( "mean_vectors", (feature_dim, int(num_classes)), initializer=tf.truncated_normal_initializer(stddev=1e-3), regularizer=None) scale = slim.model_variable( "scale", (), tf.float32, initializer=tf.constant_initializer(0., tf.float32), regularizer=slim.l2_regularizer(1e-1)) scale = tf.nn.softplus(scale) # Mean vectors in colums, normalize axis 0. weights_normed = tf.nn.l2_normalize(weights, dim=0) logits = scale * tf.matmul(features, weights_normed) return logits
def _batch_norm(inputs, decay=0.999, center=True, scale=False, epsilon=0.001, activation_fn=None, param_initializers=None, param_regularizers=None, updates_collections=tf.GraphKeys.UPDATE_OPS, is_training=True, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, batch_weights=None, fused=None, data_format='NHWC', zero_debias_moving_mean=False, scope=None, renorm=False, renorm_clipping=None, renorm_decay=0.99, adjustment=None): print("_batch_norm:center:", center) print("_batch_norm:scale :", scale) bn_with_scale_false = slim.batch_norm( inputs, decay, False, False, epsilon, activation_fn, param_initializers, param_regularizers, updates_collections, is_training, reuse, variables_collections, outputs_collections, trainable, batch_weights, fused, data_format, zero_debias_moving_mean, scope, renorm, renorm_clipping, renorm_decay, adjustment) with tf.variable_scope('XBatchNorm') as scbn: gamma = slim.model_variable('gamma', shape=[inputs.shape[-1]], initializer=tf.ones_initializer(), regularizer=slim.l1_regularizer( 0.0001)) #slim.l1_regularizer!!! beta = slim.model_variable('beta', shape=[inputs.shape[-1]], initializer=tf.zeros_initializer()) bn = tf.multiply(bn_with_scale_false, pruning.apply_mask(gamma, scbn)) bn = tf.add(bn, beta) return bn
def _construct_prompt_encoder(self, p_input, p_seqlens): """ Construct RNNLM network Args: ? Returns: predictions, probabilities, logits, attention """ L2 = self.network_architecture['L2'] initializer = self.network_architecture['initializer'] # Question Encoder RNN with tf.variable_scope('Embeddings', initializer=initializer(self._seed)) as scope: embedding = slim.model_variable( 'word_embedding', shape=[ self.network_architecture['n_in'], self.network_architecture['n_ehid'] ], initializer=tf.truncated_normal_initializer(stddev=0.1), regularizer=slim.l2_regularizer(L2), device='/GPU:0') p_inputs = tf.nn.embedding_lookup(embedding, p_input, name='embedded_data') p_inputs_fw = tf.transpose(p_inputs, [1, 0, 2]) p_inputs_bw = tf.transpose( tf.reverse_sequence(p_inputs, seq_lengths=p_seqlens, seq_axis=1, batch_axis=0), [1, 0, 2]) # Prompt Encoder RNN with tf.variable_scope('RNN_Q_FW', initializer=initializer(self._seed)) as scope: rnn_fw = tf.contrib.rnn.LSTMBlockFusedCell( num_units=self.network_architecture['n_phid']) _, state_fw = rnn_fw(p_inputs_fw, sequence_length=p_seqlens, dtype=tf.float32) with tf.variable_scope('RNN_Q_BW', initializer=initializer(self._seed)) as scope: rnn_bw = tf.contrib.rnn.LSTMBlockFusedCell( num_units=self.network_architecture['n_phid']) _, state_bw = rnn_bw(p_inputs_bw, sequence_length=p_seqlens, dtype=tf.float32) prompt_embeddings = tf.concat([state_fw[1], state_bw[1]], axis=1) return prompt_embeddings
def modelVariable(): weight1 = slim.model_variable( name="weight1", shape=[2, 3], initializer=tf.truncated_normal_initializer(stddev=0.1), regularizer=slim.l2_regularizer(scale=0.05)) weight2 = slim.model_variable( name="weight2", shape=[2, 3], initializer=tf.truncated_normal_initializer(stddev=0.1), regularizer=slim.l2_regularizer(scale=0.05)) model_variable = slim.get_model_variables() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) print(sess.run(weight1)) print("-----------------") print(sess.run(weight2)) print("----------------") print(sess.run(slim.get_variables_by_suffix("weight1")))
def vlad(feature_map, config, training, mask=None): with tf.variable_scope('vlad'): if config['intermediate_proj']: with slim.arg_scope([slim.conv2d, slim.batch_norm], trainable=training): with slim.arg_scope([slim.batch_norm], is_training=training): feature_map = slim.conv2d( feature_map, config['intermediate_proj'], 1, rate=1, activation_fn=None, normalizer_fn=slim.batch_norm, weights_initializer=slim.initializers. xavier_initializer(), trainable=training, scope='pre_proj') batch_size = tf.shape(feature_map)[0] feature_dim = feature_map.shape[-1] with slim.arg_scope([slim.batch_norm], trainable=training, is_training=training): memberships = slim.conv2d( feature_map, config['n_clusters'], 1, rate=1, activation_fn=None, normalizer_fn=slim.batch_norm, weights_initializer=slim.initializers.xavier_initializer(), trainable=training, scope='memberships') memberships = tf.nn.softmax(memberships, axis=-1) clusters = slim.model_variable( 'clusters', shape=[1, 1, 1, config['n_clusters'], feature_dim], initializer=slim.initializers.xavier_initializer(), trainable=training) residuals = clusters - tf.expand_dims(feature_map, axis=3) residuals *= tf.expand_dims(memberships, axis=-1) if mask is not None: residuals *= tf.to_float(mask)[..., tf.newaxis, tf.newaxis] descriptor = tf.reduce_sum(residuals, axis=[1, 2]) descriptor = tf.nn.l2_normalize(descriptor, axis=1) # intra-normalization descriptor = tf.reshape( descriptor, [batch_size, feature_dim * config['n_clusters']]) descriptor = tf.nn.l2_normalize(descriptor, axis=1) return descriptor
def prelu(inputs, data_format='NHWC', scope=None): with tf.variable_scope(scope, default_name='prelu'): channel_dim = 1 if data_format == 'NCHW' else 3 inputs_shape = inputs.get_shape().as_list() alpha_shape = [1 for i in range(len(inputs_shape))] alpha_shape[channel_dim] = inputs_shape[channel_dim] alpha = slim.model_variable( 'weights', alpha_shape, initializer=tf.constant_initializer(0.25)) outputs = tf.where(inputs > 0, inputs, inputs * alpha) return outputs
def _label_conditioned_variable(name, initializer, labels, num_categories): """Label conditioning.""" shape = tf.TensorShape([num_categories]).concatenate(params_shape) var_collections = slim.utils.get_variable_collections( variables_collections, name) var = slim.model_variable(name, shape=shape, dtype=dtype, initializer=initializer, collections=var_collections, trainable=trainable) conditioned_var = tf.gather(var, labels) conditioned_var = tf.expand_dims( tf.expand_dims(conditioned_var, 1), 1) return conditioned_var
def _network_template(self, obs, num_layers, hidden_units): """PixelCNN network architecture.""" with slim.arg_scope( [slim.conv2d, masked_conv2d], weights_initializer=tf.variance_scaling_initializer( distribution='uniform'), biases_initializer=tf.constant_initializer(0.0)): net = masked_conv2d(obs, hidden_units, [7, 7], mask_type='A', activation_fn=None, scope='masked_conv_1') embedding = slim.model_variable( 'embedding', shape=(1, ) + self.resize_shape + (4, ), initializer=tf.variance_scaling_initializer( distribution='uniform')) for i in range(1, num_layers + 1): net2 = gating_layer(net, embedding, hidden_units, 'gating_{}'.format(i)) net += masked_conv2d(net2, hidden_units, [1, 1], mask_type='B', activation_fn=None, scope='masked_conv_{}'.format(i + 1)) net += slim.conv2d(embedding, hidden_units, [1, 1], activation_fn=None) net = tf.nn.relu(net) net = masked_conv2d(net, 64, [1, 1], scope='1x1_conv_out', mask_type='B', activation_fn=tf.nn.relu) logits = masked_conv2d(net, self.quantization_factor, [1, 1], scope='logits', mask_type='B', activation_fn=None) loss = tf.losses.sparse_softmax_cross_entropy( labels=tf.cast(obs, tf.int32), logits=logits, reduction=tf.losses.Reduction.MEAN) return collections.namedtuple('PixelCNN_network', ['logits', 'loss'])(logits, loss)
def relational_gin_mlp_neighborhood_aggregation( inputs, num_outputs, adjacency, activation_fn=None, weights_initializer=tf.glorot_uniform_initializer(), biases_initializer=tf.zeros_initializer(), use_bias=True, scope=None): """aggregate features from neighbors""" with tf.variable_scope(scope, 'gin_mlp_neigh_agg', [inputs, adjacency]): adj_shape = adjacency.get_shape() if adj_shape.ndims != 3: raise ValueError( 'Rank mismatch: adjacency (received %s) should have ' 'rank 3' % (adj_shape.ndims, )) in_shape = inputs.get_shape() if in_shape.ndims != 2: raise ValueError('Rank mismatch: inputs (received %s) should have ' 'rank 2' % (in_shape.ndims, )) adjacency = tf.cast(adjacency, dtype=inputs.dtype) num_edge_types = adjacency.shape[0].value edge_layers = [] for i in range(num_edge_types): with tf.variable_scope('relation_{}'.format(i + 1), values=[inputs, adjacency]): eps = slim.model_variable('epsilon', shape=[]) adj_i = adjacency[i] x_i = tf.multiply(1. + eps, inputs) + tf.matmul(adj_i, inputs) edge_outputs = tf.layers.dense( x_i, num_outputs, activation=None, kernel_initializer=weights_initializer, bias_initializer=biases_initializer, use_bias=use_bias) edge_layers.append(edge_outputs) edge_layers = tf.stack(edge_layers, axis=1) x = tf.reduce_sum(edge_layers, axis=1) if activation_fn is not None: x = activation_fn(x) return x
def bilinear_conv2d(net, scope_name, kernel_size, in_depth, out_depth, rate, reuse=None, use_bias = True, activation_fn=tf.nn.elu): with tf.variable_scope(scope_name, reuse=reuse) as scope: upsampled_size = (kernel_size - 1)*(rate - 1) + kernel_size kernel = slim.model_variable('weights', shape=[kernel_size, kernel_size, in_depth, out_depth], initializer=tf.truncated_normal_initializer(stddev=0.1)) kernel = tf.transpose(kernel, perm=[2, 0, 1, 3]) kernel = tf.image.resize_bilinear(kernel, [upsampled_size, upsampled_size]) kernel = tf.transpose(kernel, perm=[1, 2, 0, 3]) conv = tf.nn.conv2d(net, kernel, [1, 1, 1, 1], padding='SAME') if use_bias: initial_biases = lambda: tf.constant(0.0, shape=[out_depth], dtype=tf.float32) biases = tf.Variable(initial_value = initial_biases, trainable=True, name='biases') conv = tf.nn.bias_add(conv, biases) #conv1 = activation_fn(bias, name=scope_name) return conv
def get_variable(self, name, shape, use_slim=False, **kwargs): import tensorflow.contrib.slim as slim with tf.device('/cpu:0'): dtype = tf.float32 # tf.float16 if FLAGS.use_fp16 else tf.float32 if kwargs.get('trainable') is None: kwargs['trainable'] = self.trainable if kwargs.get('dtype'): dtype = kwargs['dtype'] del kwargs['dtype'] if use_slim: var = slim.model_variable(name, shape=shape, dtype=dtype, **kwargs) else: var = tf.get_variable(name, shape, dtype=dtype, **kwargs) return var
def prelu(inputs, data_format='NHWC', scope=None): with tf.variable_scope(scope, default_name='prelu'): channel_dim = 1 if data_format == 'NCHW' else -1 inputs_shape = inputs.get_shape().as_list() alpha_shape = [1 for i in range(len(inputs_shape))] alpha_shape[channel_dim] = inputs_shape[channel_dim] alpha = slim.model_variable('weights', alpha_shape, initializer=tf.constant_initializer(0.25)) jit_scope = tf.contrib.compiler.jit.experimental_jit_scope with jit_scope(): outputs = tf.where(inputs > 0, inputs, inputs * alpha) # outputs = tf.maximum(0.0, inputs) + alpha * tf.minimum(0.0, inputs) return outputs
def _weighted_variable(name, initializer, weights, num_categories): """Weighting.""" shape = tf.TensorShape([num_categories]).concatenate(params_shape) var_collections = slim.utils.get_variable_collections( variables_collections, name) var = slim.model_variable(name, shape=shape, dtype=dtype, initializer=initializer, collections=var_collections, trainable=trainable) weights = tf.reshape( weights, weights.get_shape().concatenate([1] * params_shape.ndims)) conditioned_var = weights * var conditioned_var = tf.reduce_sum(conditioned_var, 0, keep_dims=True) conditioned_var = tf.expand_dims( tf.expand_dims(conditioned_var, 1), 1) return conditioned_var
def _model_variable_getter( getter, name, shape=None, dtype=None, initializer=None, regularizer=None, trainable=True, collections=None, caching_device=None, partitioner=None, rename=None, use_resource=None, synchronization=tf_variables.VariableSynchronization.AUTO, aggregation=tf_variables.VariableAggregation.NONE, **_): """Getter that uses model_variable for compatibility with core layers.""" short_name = name.split('/')[-1] if rename and short_name in rename: name_components = name.split('/') name_components[-1] = rename[short_name] name = '/'.join(name_components) from tensorflow.contrib.slim import model_variable return model_variable(name, shape=shape, dtype=dtype, initializer=initializer, regularizer=regularizer, collections=collections, trainable=trainable, caching_device=caching_device, partitioner=partitioner, custom_getter=getter, use_resource=use_resource, synchronization=synchronization, aggregation=aggregation)
def em_routing(votes_ij, activations_i, batch_size, spatial_routing_matrix): """The EM routing between input capsules (i) and output capsules (j). See Hinton et al. "Matrix Capsules with EM Routing" for detailed description of EM routing. Author: Ashley Gritzman 19/10/2018 Definitions: N -> number of samples in batch OH -> output height OW -> output width kh -> kernel height kw -> kernel width kk -> kh * kw i -> number of input capsules, also called "child_caps" o -> number of output capsules, also called "parent_caps" child_space -> spatial dimensions of input capsule layer i parent_space -> spatial dimensions of output capsule layer j n_channels -> number of channels in pose matrix (usually 4x4=16) Args: votes_ij: votes from capsules in layer i to capsules in layer j For conv layer: (N*OH*OW, kh*kw*i, o, 4x4) (64*6*6, 9*8, 32, 16) For FC layer: The kernel dimensions are equal to the spatial dimensions of the input layer i, and the spatial dimensions of the output layer j are 1x1. (N*1*1, child_space*child_space*i, o, 4x4) (64, 4*4*16, 5, 16) activations_i: activations of capsules in layer i (L) (N*OH*OW, kh*kw*i, 1) (64*6*6, 9*8, 1) batch_size: spatial_routing_matrix: Returns: poses_j: poses of capsules in layer j (L+1) (N, OH, OW, o, 4x4) (64, 6, 6, 32, 16) activations_j: activations of capsules in layer j (L+1) (N, OH, OW, o, 1) (64, 6, 6, 32, 1) """ #----- Dimensions -----# # Get dimensions needed to do conversions N = batch_size votes_shape = votes_ij.get_shape().as_list() OH = np.sqrt(int(votes_shape[0]) / N) OH = int(OH) OW = np.sqrt(int(votes_shape[0]) / N) OW = int(OW) kh_kw_i = int(votes_shape[1]) o = int(votes_shape[2]) n_channels = int(votes_shape[3]) # Calculate kernel size by adding up column of spatial routing matrix # Do this before conventing the spatial_routing_matrix to tf kk = int(np.sum(spatial_routing_matrix[:,0])) parent_caps = o child_caps = int(kh_kw_i/kk) rt_mat_shape = spatial_routing_matrix.shape child_space_2 = rt_mat_shape[0] child_space = int(np.sqrt(child_space_2)) parent_space_2 = rt_mat_shape[1] parent_space = int(np.sqrt(parent_space_2)) #----- Reshape Inputs -----# # conv: (N*OH*OW, kh*kw*i, o, 4x4) -> (N, OH, OW, kh*kw*i, o, 4x4) # FC: (N, child_space*child_space*i, o, 4x4) -> (N, 1, 1, child_space*child_space*i, output_classes, 4x4) votes_ij = tf.reshape(votes_ij, [N, OH, OW, kh_kw_i, o, n_channels]) # (N*OH*OW, kh*kw*i, 1) -> (N, OH, OW, kh*kw*i, o, n_channels) # (24, 6, 6, 288, 1, 1) activations_i = tf.reshape(activations_i, [N, OH, OW, kh_kw_i, 1, 1]) #----- Betas -----# """ # Initialization from Jonathan Hui [1]: beta_v_hui = tf.get_variable( name='beta_v', shape=[1, 1, 1, o], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) beta_a_hui = tf.get_variable( name='beta_a', shape=[1, 1, 1, o], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) # AG 21/11/2018: # Tried to find std according to Hinton's comments on OpenReview # https://openreview.net/forum?id=HJWLfGWRb¬eId=r1lQjCAChm # Hinton: "We used truncated_normal_initializer and set the std so that at the # start of training half of the capsules in each layer are active and half # inactive (for the Primary Capsule layer where the activation is not computed # through routing we use different std for activation convolution weights & # for pose parameter convolution weights)." # # std beta_v seems to control the spread of activations # To try and achieve what Hinton said about half active and half not active, # I change the std values and check the histogram/distributions in # Tensorboard # to try and get a good spread across all values. I couldn't get this working # nicely. beta_v_hui = slim.model_variable( name='beta_v', shape=[1, 1, 1, 1, o, 1], dtype=tf.float32, initializer=tf.truncated_normal_initializer(mean=0.0, stddev=10.0)) """ beta_a = slim.model_variable( name='beta_a', shape=[1, 1, 1, 1, o, 1], dtype=tf.float32, initializer=tf.truncated_normal_initializer(mean=-1000.0, stddev=500.0)) # AG 04/10/2018: using slim.variable to create instead of tf.get_variable so # that they get correctly placed on the CPU instead of GPU in the multi-gpu # version. # One beta per output capsule type # (1, 1, 1, 1, 32, 1) # (N, OH, OH, i, o, n_channels) beta_v = slim.model_variable( name='beta_v', shape=[1, 1, 1, 1, o, 1], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer(), regularizer=None) """ beta_a = slim.model_variable( name='beta_a', shape=[1, 1, 1, 1, o, 1], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer(), regularizer=None) """ with tf.variable_scope("em_routing") as scope: # Initialise routing assignments # rr (1, 6, 6, 9, 8, 16) # (1, parent_space, parent_space, kk, child_caps, parent_caps) rr = utl.init_rr(spatial_routing_matrix, child_caps, parent_caps) # Need to reshape (1, 6, 6, 9, 8, 16) -> (1, 6, 6, 9*8, 16, 1) rr = np.reshape( rr, [1, parent_space, parent_space, kk*child_caps, parent_caps, 1]) # Convert rr from np to tf rr = tf.constant(rr, dtype=tf.float32) for it in range(FLAGS.iter_routing): # AG 17/09/2018: modified schedule for inverse_temperature (lambda) based # on Hinton's response to questions on OpenReview.net: # https://openreview.net/forum?id=HJWLfGWRb # "the formula we used for lambda is: # lambda = final_lambda * (1 - tf.pow(0.95, tf.cast(i + 1, tf.float32))) # where 'i' is the routing iteration (range is 0-2). Final_lambda is set # to 0.01." # final_lambda = 0.01 final_lambda = FLAGS.final_lambda inverse_temperature = (final_lambda * (1 - tf.pow(0.95, tf.cast(it + 1, tf.float32)))) # AG 26/06/2018: added var_j activations_j, mean_j, stdv_j, var_j = m_step( rr, votes_ij, activations_i, beta_v, beta_a, inverse_temperature=inverse_temperature) # We skip the e_step call in the last iteration because we only need to # return the a_j and the mean from the m_stp in the last iteration to # compute the output capsule activation and pose matrices if it < FLAGS.iter_routing - 1: rr = e_step(votes_ij, activations_j, mean_j, stdv_j, var_j, spatial_routing_matrix) # pose: (N, OH, OW, o, 4 x 4) via squeeze mean_j (24, 6, 6, 32, 16) poses_j = tf.squeeze(mean_j, axis=-3, name="poses") # activation: (N, OH, OW, o, 1) via squeeze o_activation is # [24, 6, 6, 32, 1] activations_j = tf.squeeze(activations_j, axis=-3, name="activations") return poses_j, activations_j
def _create_network(incoming, num_classes, reuse=None, l2_normalize=True, create_summaries=True, weight_decay=1e-8): nonlinearity = tf.nn.elu conv_weight_init = tf.truncated_normal_initializer(stddev=1e-3) conv_bias_init = tf.zeros_initializer() conv_regularizer = slim.l2_regularizer(weight_decay) fc_weight_init = tf.truncated_normal_initializer(stddev=1e-3) fc_bias_init = tf.zeros_initializer() fc_regularizer = slim.l2_regularizer(weight_decay) def batch_norm_fn(x): return slim.batch_norm(x, scope=tf.get_variable_scope().name + "/bn") network = incoming network = slim.conv2d(network, 32, [3, 3], stride=1, activation_fn=nonlinearity, padding="SAME", normalizer_fn=batch_norm_fn, scope="conv1_1", weights_initializer=conv_weight_init, biases_initializer=conv_bias_init, weights_regularizer=conv_regularizer) if create_summaries: tf.summary.histogram(network.name + "/activations", network) tf.summary.image("conv1_1/weights", tf.transpose( slim.get_variables("conv1_1/weights:0")[0], [3, 0, 1, 2]), max_images=128) network = slim.conv2d(network, 32, [3, 3], stride=1, activation_fn=nonlinearity, padding="SAME", normalizer_fn=batch_norm_fn, scope="conv1_2", weights_initializer=conv_weight_init, biases_initializer=conv_bias_init, weights_regularizer=conv_regularizer) if create_summaries: tf.summary.histogram(network.name + "/activations", network) # NOTE(nwojke): This is missing a padding="SAME" to match the CNN # architecture in Table 1 of the paper. Information on how this affects # performance on MOT 16 training sequences can be found in # issue 10 https://github.com/nwojke/deep_sort/issues/10 network = slim.max_pool2d(network, [3, 3], [2, 2], scope="pool1") network = residual_block(network, "conv2_1", nonlinearity, conv_weight_init, conv_bias_init, conv_regularizer, increase_dim=False, is_first=True, summarize_activations=create_summaries) network = residual_block(network, "conv2_3", nonlinearity, conv_weight_init, conv_bias_init, conv_regularizer, increase_dim=False, summarize_activations=create_summaries) network = residual_block(network, "conv3_1", nonlinearity, conv_weight_init, conv_bias_init, conv_regularizer, increase_dim=True, summarize_activations=create_summaries) network = residual_block(network, "conv3_3", nonlinearity, conv_weight_init, conv_bias_init, conv_regularizer, increase_dim=False, summarize_activations=create_summaries) network = residual_block(network, "conv4_1", nonlinearity, conv_weight_init, conv_bias_init, conv_regularizer, increase_dim=True, summarize_activations=create_summaries) network = residual_block(network, "conv4_3", nonlinearity, conv_weight_init, conv_bias_init, conv_regularizer, increase_dim=False, summarize_activations=create_summaries) feature_dim = network.get_shape().as_list()[-1] #print("feature dimensionality: ", feature_dim) network = slim.flatten(network) network = slim.dropout(network, keep_prob=0.6) network = slim.fully_connected(network, feature_dim, activation_fn=nonlinearity, normalizer_fn=batch_norm_fn, weights_regularizer=fc_regularizer, scope="fc1", weights_initializer=fc_weight_init, biases_initializer=fc_bias_init) features = network if l2_normalize: # Features in rows, normalize axis 1. features = slim.batch_norm(features, scope="ball", reuse=reuse) feature_norm = tf.sqrt( tf.constant(1e-8, tf.float32) + tf.reduce_sum(tf.square(features), [1], keep_dims=True)) features = features / feature_norm with slim.variable_scope.variable_scope("ball", reuse=reuse): weights = slim.model_variable( "mean_vectors", (feature_dim, num_classes), initializer=tf.truncated_normal_initializer(stddev=1e-3), regularizer=None) scale = slim.model_variable("scale", (num_classes, ), tf.float32, tf.constant_initializer( 0., tf.float32), regularizer=None) if create_summaries: tf.summary.histogram("scale", scale) # scale = slim.model_variable( # "scale", (), tf.float32, # initializer=tf.constant_initializer(0., tf.float32), # regularizer=slim.l2_regularizer(1e-2)) # if create_summaries: # tf.scalar_summary("scale", scale) scale = tf.nn.softplus(scale) # Each mean vector in columns, normalize axis 0. weight_norm = tf.sqrt( tf.constant(1e-8, tf.float32) + tf.reduce_sum(tf.square(weights), [0], keep_dims=True)) logits = scale * tf.matmul(features, weights / weight_norm) else: logits = slim.fully_connected(features, num_classes, activation_fn=None, normalizer_fn=None, weights_regularizer=fc_regularizer, scope="softmax", weights_initializer=fc_weight_init, biases_initializer=fc_bias_init) return features, logits
def create_network(images, num_classes=None, add_logits=True, reuse=None, create_summaries=True, weight_decay=1e-8): nonlinearity = tf.nn.elu conv_weight_init = tf.truncated_normal_initializer(stddev=1e-3) conv_bias_init = tf.zeros_initializer() conv_regularizer = slim.l2_regularizer(weight_decay) fc_weight_init = tf.truncated_normal_initializer(stddev=1e-3) fc_bias_init = tf.zeros_initializer() fc_regularizer = slim.l2_regularizer(weight_decay) def batch_norm_fn(x): return slim.batch_norm(x, scope=tf.get_variable_scope().name + "/bn") network = images network = slim.conv2d(network, 32, [3, 3], stride=1, activation_fn=nonlinearity, padding="SAME", normalizer_fn=batch_norm_fn, scope="conv1_1", weights_initializer=conv_weight_init, biases_initializer=conv_bias_init, weights_regularizer=conv_regularizer) if create_summaries: tf.summary.histogram(network.name + "/activations", network) tf.summary.image("conv1_1/weights", tf.transpose( slim.get_variables("conv1_1/weights:0")[0], [3, 0, 1, 2]), max_outputs=128) network = slim.conv2d(network, 32, [3, 3], stride=1, activation_fn=nonlinearity, padding="SAME", normalizer_fn=batch_norm_fn, scope="conv1_2", weights_initializer=conv_weight_init, biases_initializer=conv_bias_init, weights_regularizer=conv_regularizer) if create_summaries: tf.summary.histogram(network.name + "/activations", network) network = slim.max_pool2d(network, [3, 3], [2, 2], scope="pool1", padding="SAME") network = residual_net.residual_block( network, "conv2_1", nonlinearity, conv_weight_init, conv_bias_init, conv_regularizer, increase_dim=False, is_first=True, summarize_activations=create_summaries) network = residual_net.residual_block( network, "conv2_3", nonlinearity, conv_weight_init, conv_bias_init, conv_regularizer, increase_dim=False, summarize_activations=create_summaries) network = residual_net.residual_block( network, "conv3_1", nonlinearity, conv_weight_init, conv_bias_init, conv_regularizer, increase_dim=True, summarize_activations=create_summaries) network = residual_net.residual_block( network, "conv3_3", nonlinearity, conv_weight_init, conv_bias_init, conv_regularizer, increase_dim=False, summarize_activations=create_summaries) network = residual_net.residual_block( network, "conv4_1", nonlinearity, conv_weight_init, conv_bias_init, conv_regularizer, increase_dim=True, summarize_activations=create_summaries) network = residual_net.residual_block( network, "conv4_3", nonlinearity, conv_weight_init, conv_bias_init, conv_regularizer, increase_dim=False, summarize_activations=create_summaries) feature_dim = network.get_shape().as_list()[-1] print("feature dimensionality: ", feature_dim) network = slim.flatten(network) network = slim.dropout(network, keep_prob=0.6) network = slim.fully_connected(network, feature_dim, activation_fn=nonlinearity, normalizer_fn=batch_norm_fn, weights_regularizer=fc_regularizer, scope="fc1", weights_initializer=fc_weight_init, biases_initializer=fc_bias_init) features = network # Features in rows, normalize axis 1. features = tf.nn.l2_normalize(features, dim=1) if add_logits: with slim.variable_scope.variable_scope("ball", reuse=reuse): weights = slim.model_variable( "mean_vectors", (feature_dim, int(num_classes)), initializer=tf.truncated_normal_initializer(stddev=1e-3), regularizer=None) scale = slim.model_variable("scale", (), tf.float32, initializer=tf.constant_initializer( 0., tf.float32), regularizer=slim.l2_regularizer(1e-1)) if create_summaries: tf.summary.scalar("scale", scale) scale = tf.nn.softplus(scale) # Mean vectors in colums, normalize axis 0. weights_normed = tf.nn.l2_normalize(weights, dim=0) logits = scale * tf.matmul(features, weights_normed) else: logits = None return features, logits
def var_on_cpu(name, shape, initializer, dtype=tf.float32): return slim.model_variable(name, shape, dtype=dtype, initializer=initializer, device='/CPU:0')
import tensorflow as tf import tensorflow.contrib.slim as slim # Build a graph. weights = slim.variable('weights', shape=[10, 10, 3 , 3], initializer=tf.truncated_normal_initializer(stddev=0.1), regularizer=slim.l2_regularizer(0.05), device='/CPU:0') weights_2 = slim.model_variable('weights_2', shape=[10, 10, 3 , 3], initializer=tf.truncated_normal_initializer(stddev=0.1), regularizer=slim.l2_regularizer(0.05), device='/CPU:0') my_var = slim.variable('my_var', shape=[20, 1], initializer=tf.zeros_initializer()) regular_variables_and_model_variables = slim.get_variables() variables_to_restore = slim.get_variables_to_restore(exclude=["v1"]) # Launch the graph in a session. sess = tf.Session() # Evaluate the tensor `c`. with tf.Session() as sess: sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) print(my_var.eval())
def _create_network(incoming, num_classes, reuse=None, l2_normalize=True, create_summaries=True, weight_decay=1e-8): nonlinearity = tf.nn.elu conv_weight_init = tf.truncated_normal_initializer(stddev=1e-3) conv_bias_init = tf.zeros_initializer() conv_regularizer = slim.l2_regularizer(weight_decay) fc_weight_init = tf.truncated_normal_initializer(stddev=1e-3) fc_bias_init = tf.zeros_initializer() fc_regularizer = slim.l2_regularizer(weight_decay) def batch_norm_fn(x): return slim.batch_norm(x, scope=tf.get_variable_scope().name + "/bn") network = incoming network = slim.conv2d( network, 32, [3, 3], stride=1, activation_fn=nonlinearity, padding="SAME", normalizer_fn=batch_norm_fn, scope="conv1_1", weights_initializer=conv_weight_init, biases_initializer=conv_bias_init, weights_regularizer=conv_regularizer) if create_summaries: tf.summary.histogram(network.name + "/activations", network) tf.summary.image("conv1_1/weights", tf.transpose( slim.get_variables("conv1_1/weights:0")[0], [3, 0, 1, 2]), max_images=128) network = slim.conv2d( network, 32, [3, 3], stride=1, activation_fn=nonlinearity, padding="SAME", normalizer_fn=batch_norm_fn, scope="conv1_2", weights_initializer=conv_weight_init, biases_initializer=conv_bias_init, weights_regularizer=conv_regularizer) if create_summaries: tf.summary.histogram(network.name + "/activations", network) # NOTE(nwojke): This is missing a padding="SAME" to match the CNN # architecture in Table 1 of the paper. Information on how this affects # performance on MOT 16 training sequences can be found in # issue 10 https://github.com/nwojke/deep_sort/issues/10 network = slim.max_pool2d(network, [3, 3], [2, 2], scope="pool1") network = residual_block( network, "conv2_1", nonlinearity, conv_weight_init, conv_bias_init, conv_regularizer, increase_dim=False, is_first=True, summarize_activations=create_summaries) network = residual_block( network, "conv2_3", nonlinearity, conv_weight_init, conv_bias_init, conv_regularizer, increase_dim=False, summarize_activations=create_summaries) network = residual_block( network, "conv3_1", nonlinearity, conv_weight_init, conv_bias_init, conv_regularizer, increase_dim=True, summarize_activations=create_summaries) network = residual_block( network, "conv3_3", nonlinearity, conv_weight_init, conv_bias_init, conv_regularizer, increase_dim=False, summarize_activations=create_summaries) network = residual_block( network, "conv4_1", nonlinearity, conv_weight_init, conv_bias_init, conv_regularizer, increase_dim=True, summarize_activations=create_summaries) network = residual_block( network, "conv4_3", nonlinearity, conv_weight_init, conv_bias_init, conv_regularizer, increase_dim=False, summarize_activations=create_summaries) feature_dim = network.get_shape().as_list()[-1] print("feature dimensionality: ", feature_dim) network = slim.flatten(network) network = slim.dropout(network, keep_prob=0.6) network = slim.fully_connected( network, feature_dim, activation_fn=nonlinearity, normalizer_fn=batch_norm_fn, weights_regularizer=fc_regularizer, scope="fc1", weights_initializer=fc_weight_init, biases_initializer=fc_bias_init) features = network if l2_normalize: # Features in rows, normalize axis 1. features = slim.batch_norm(features, scope="ball", reuse=reuse) feature_norm = tf.sqrt( tf.constant(1e-8, tf.float32) + tf.reduce_sum(tf.square(features), [1], keep_dims=True)) features = features / feature_norm with slim.variable_scope.variable_scope("ball", reuse=reuse): weights = slim.model_variable( "mean_vectors", (feature_dim, num_classes), initializer=tf.truncated_normal_initializer(stddev=1e-3), regularizer=None) scale = slim.model_variable( "scale", (num_classes, ), tf.float32, tf.constant_initializer(0., tf.float32), regularizer=None) if create_summaries: tf.summary.histogram("scale", scale) # scale = slim.model_variable( # "scale", (), tf.float32, # initializer=tf.constant_initializer(0., tf.float32), # regularizer=slim.l2_regularizer(1e-2)) # if create_summaries: # tf.scalar_summary("scale", scale) scale = tf.nn.softplus(scale) # Each mean vector in columns, normalize axis 0. weight_norm = tf.sqrt( tf.constant(1e-8, tf.float32) + tf.reduce_sum(tf.square(weights), [0], keep_dims=True)) logits = scale * tf.matmul(features, weights / weight_norm) else: logits = slim.fully_connected( features, num_classes, activation_fn=None, normalizer_fn=None, weights_regularizer=fc_regularizer, scope="softmax", weights_initializer=fc_weight_init, biases_initializer=fc_bias_init) return features, logits
with tf.Session() as sess: sess.run(init_op) val_w4 = sess.run(weight_4) print(val_w4.shape) plt.hist(val_w4) plt.show() ''' # 3. TF-slim: model variable and regular variable # 모델 변수 생성하기 weight_5 = slim.model_variable( 'w5', shape=[10, 10, 3, 3], initializer=tf.truncated_normal_initializer(stddev=0.1), regularizer=slim.l2_regularizer(0.05), device='/CPU:0') model_variables = slim.get_model_variables() print([var.name for var in model_variables]) # >> [u'w5:0'] # 일반 변수 생성하기 my_var_1 = slim.variable('mv1', shape=[20, 1], initializer=tf.zeros_initializer(), device='/device:GPU:0') model_variables = slim.get_model_variables() all_variables = slim.get_variables() print([var.name for var in model_variables]) # >> [u'w5:0']
def netvlad(net, videos_per_batch, weight_decay, netvlad_initCenters): end_points = {} # VLAD pooling try: netvlad_initCenters = int(netvlad_initCenters) # initialize the cluster centers randomly cluster_centers = np.random.normal(size=( netvlad_initCenters, net.get_shape().as_list()[-1])) logging.info('Randomly initializing the {} netvlad cluster ' 'centers'.format(cluster_centers.shape)) except ValueError: with open(netvlad_initCenters, 'rb') as fin: kmeans = pickle.load(fin) cluster_centers = kmeans.cluster_centers_ with tf.variable_scope('NetVLAD'): # normalize features net_normed = tf.nn.l2_normalize(net, 3, name='FeatureNorm') end_points[tf.get_variable_scope().name + '/net_normed'] = net_normed vlad_centers = slim.model_variable( 'centers', shape=cluster_centers.shape, initializer=tf.constant_initializer(cluster_centers), regularizer=slim.l2_regularizer(weight_decay)) end_points[tf.get_variable_scope().name + '/vlad_centers'] = vlad_centers vlad_W = slim.model_variable( 'vlad_W', shape=(1, 1, ) + cluster_centers.transpose().shape, initializer=tf.constant_initializer( cluster_centers.transpose()[np.newaxis, np.newaxis, ...] * 2 * FLAGS.netvlad_alpha), regularizer=slim.l2_regularizer(weight_decay)) end_points[tf.get_variable_scope().name + '/vlad_W'] = vlad_W vlad_B = slim.model_variable( 'vlad_B', shape=cluster_centers.shape[0], initializer=tf.constant_initializer( -FLAGS.netvlad_alpha * np.sum(np.square(cluster_centers), axis=1)), regularizer=slim.l2_regularizer(weight_decay)) end_points[tf.get_variable_scope().name + '/vlad_B'] = vlad_B conv_output = tf.nn.conv2d(net_normed, vlad_W, [1, 1, 1, 1], 'VALID') dists = tf.nn.bias_add(conv_output, vlad_B) assgn = softmax(dists, axis=3) end_points[tf.get_variable_scope().name + '/assgn'] = assgn vid_splits = tf.split(0, videos_per_batch, net_normed) assgn_splits = tf.split(0, videos_per_batch, assgn) num_vlad_centers = vlad_centers.get_shape()[0] vlad_centers_split = tf.split(0, num_vlad_centers, vlad_centers) final_vlad = [] for feats, assgn in zip(vid_splits, assgn_splits): vlad_vectors = [] assgn_split_byCluster = tf.split(3, num_vlad_centers, assgn) for k in range(num_vlad_centers): res = tf.reduce_sum( tf.mul(tf.sub( feats, vlad_centers_split[k]), assgn_split_byCluster[k]), [0, 1, 2]) vlad_vectors.append(res) vlad_vectors_frame = tf.pack(vlad_vectors, axis=0) final_vlad.append(vlad_vectors_frame) vlad_rep = tf.pack(final_vlad, axis=0, name='unnormed-vlad') end_points[tf.get_variable_scope().name + '/unnormed_vlad'] = vlad_rep with tf.name_scope('intranorm'): intranormed = tf.nn.l2_normalize(vlad_rep, dim=2) end_points[tf.get_variable_scope().name + '/intranormed_vlad'] = intranormed with tf.name_scope('finalnorm'): vlad_rep = tf.nn.l2_normalize(tf.reshape( intranormed, [intranormed.get_shape().as_list()[0], -1]), dim=1) return vlad_rep, end_points