num_segments_per_speaker = 10 num_data = 100 num_length = 100 num_dim = 512 features = tf.placeholder(tf.float32, shape=[None, None, num_dim], name="features") labels = tf.placeholder(tf.int32, shape=[None], name="labels") embeddings = tf.placeholder(tf.float32, shape=[None, num_dim], name="embeddings") import numpy as np features_val = np.random.rand(num_data, num_length, num_dim).astype(np.float32) features_val[2, :, :] = 1e-8 * features_val[2, :, :] features_val[3, :, :] = 100 * features_val[3, :, :] labels_val = np.random.randint(0, num_labels, size=(num_data,)).astype(np.int32) from misc.utils import ParamsPlain params = ParamsPlain() params.dict["weight_l2_regularizer"] = 1e-5 params.dict["batchnorm_momentum"] = 0.99 params.dict["pooling_type"] = "statistics_pooling" params.dict["last_layer_linear"] = False params.dict["output_weight_l2_regularizer"] = 1e-4 params.dict["network_relu_type"] = "prelu" # If the norm (s) is too large, after applying the margin, the softmax value would be extremely small params.dict["asoftmax_lambda_min"] = 10 params.dict["asoftmax_lambda_base"] = 1000 params.dict["asoftmax_lambda_gamma"] = 1 params.dict["asoftmax_lambda_power"] = 4 params.dict["amsoftmax_lambda_min"] = 10 params.dict["amsoftmax_lambda_base"] = 1000
# linguistic_features: linguistic_features_val}) # key = endpoints_val["att_key"] # value = endpoints_val["att_value"] # query = endpoints_val["att_query"] # # from model.test_utils import compute_attention # att_np, penalty_loss_np = compute_attention(value, key, query, params) # # assert np.allclose(np.sum(att_val), np.sum(att_np)) # assert np.allclose(penalty_loss_val, penalty_loss_np) # # assert not np.any(np.isnan(grads_val)), "Gradient should not be nan" # assert not np.any(np.isnan(grads_penalty_val)), "Gradient should not be nan" # Self-attention params = ParamsPlain() params.dict["self_att_key_input"] = "key" params.dict["self_att_key_num_nodes"] = [] params.dict["self_att_value_num_nodes"] = [] params.dict["self_att_num_heads"] = 10 params.dict["self_att_penalty_term"] = 1 params.dict["weight_l2_regularizer"] = 1e-2 params.dict["batchnorm_momentum"] = 0.99 endpoints["key"] = features self_att = self_attention(features, aux_features, endpoints, params, is_training=True) penalty_loss = tf.reduce_sum(tf.get_collection("PENALTY"))
# att_output = endpoints_val["att_output_before_nonlinear"] # # from model.test_utils import compute_self_attention # self_att_np, penalty_loss_np = compute_self_attention(value, key, query, params) # assert not np.any(np.isnan(grads_val)), "Gradient should not be nan" # assert not np.any(np.isnan(grads_penalty_val)), "Gradient should not be nan" # assert np.allclose(penalty_loss_val, penalty_loss_np) # # for i in range(att_output.shape[0]): # for j in range(att_output.shape[1]): # if np.abs((att_output[i, j] - self_att_np[i ,j]) / (att_output[i, j]+1e-16)) > 1e-4: # print("%d %d %.10f %.10f" % (i, j, att_output[i, j], self_att_np[i, j])) # assert np.allclose(att_output, self_att_np, rtol=1e-3) # Self-attention (key transform) params = ParamsPlain() params.dict["vlad_num_centers"] = 10 params.dict["vlad_num_ghosts"] = 2 params.dict["vlad_key_input"] = "key" params.dict["vlad_key_num_nodes"] = [512] params.dict["vlad_value_input"] = "value" params.dict["vlad_value_num_nodes"] = [] params.dict["vlad_final_l2_norm"] = True params.dict["weight_l2_regularizer"] = 1e-2 params.dict["batchnorm_momentum"] = 0.99 vlad = ghost_vlad(None, None, endpoints, params, is_training=True) grads = tf.gradients(vlad, endpoints["value"]) with tf.Session() as sess: sess.run(tf.global_variables_initializer())
return stat_pooling if __name__ == "__main__": num_labels = 10 num_data = 100 num_length = 1000 num_dim = 1500 features = tf.placeholder(tf.float32, shape=[None, None, num_dim], name="features") feat_length = tf.placeholder(tf.int32, shape=[None], name="feat_length") from collections import OrderedDict endpoints = OrderedDict() from misc.utils import ParamsPlain # Self-attention params = ParamsPlain() stat_pooling = statistics_pooling_v2(features, feat_length, endpoints, params, True) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) import numpy as np features_val = np.random.rand(num_data, num_length, num_dim).astype(np.float32) features_val[0, :, :] = 0 length_val = np.random.randint(100, 1001, size=(num_data)) stat_pooling_tf = sess.run(stat_pooling, feed_dict={features: features_val, feat_length: length_val}) def compute_stat_pooling(features, length): num_data, l, dim = features.shape assert num_data == length.shape[0]