def testApplyNormBatch(self): x1 = np.random.rand(5, 2, 1, 11) x2 = common_layers.apply_norm( tf.constant(x1, dtype=tf.float32), "batch", depth=11, epsilon=1e-6) self.evaluate(tf.global_variables_initializer()) actual = self.evaluate(x2) self.assertEqual(actual.shape, (5, 2, 1, 11))
def testApplyNormLayer(self): with self.test_session() as session: x1 = np.random.rand(5, 2, 1, 11) x2 = common_layers.apply_norm(tf.constant(x1, dtype=tf.float32), "layer", depth=11, epsilon=1e-6) session.run(tf.global_variables_initializer()) actual = session.run(x2) self.assertEqual(actual.shape, (5, 2, 1, 11))
def testApplyNormNone(self): x1 = np.random.rand(5, 2, 1, 11) x2 = common_layers.apply_norm(tf.constant(x1, dtype=tf.float32), "none", depth=11, epsilon=1e-6) self.evaluate(tf.global_variables_initializer()) actual = self.evaluate(x2) self.assertEqual(actual.shape, (5, 2, 1, 11)) self.assertAllClose(actual, x1, atol=1e-03)
def graph_cnn(inputs, object_mask, num_layers, hidden_size, dropout, adjacency_matrix, norm_type="layer", norm_epsilon=0.001, test=False): """Encodes a screen using Graph Convolution Networks. Args: inputs: [batch_size, num_steps, max_object_count, depth]. object_mask: [batch_size, num_steps, max_object_count]. num_layers: the number of layers. hidden_size: the hidden layer size. dropout: dropout ratio. adjacency_matrix: the adjacency matrix [batch_size, num_steps, max_object_count, max_object_count]. norm_type: the norm_type. norm_epsilon: norm_epsilon. test: whether it's in the test mode. Returns: hidden: a Tensor of shape [batch_size, num_steps, max_object_count, depth] """ # [batch_size, num_steps, max_num_objects, max_num_objects] normalizer = tf.div( 1., tf.sqrt(tf.reduce_sum(adjacency_matrix, -1, keepdims=True))) normalizer = normalizer * tf.expand_dims( tf.expand_dims(tf.eye(tf.shape(normalizer)[-2]), 0), 0) adjacency_matrix = tf.matmul(tf.matmul(normalizer, adjacency_matrix), normalizer) hidden = inputs for layer in range(num_layers): with tf.variable_scope("gcn_layer_" + str(layer), reuse=tf.AUTO_REUSE): hidden = tf.matmul(adjacency_matrix, hidden) # [batch_size, num_steps, max_num_objects, depth] if not test: hidden = tf.layers.dense(inputs=hidden, units=hidden_size) hidden = common_layers.apply_norm(hidden, norm_type, hidden_size, epsilon=norm_epsilon) hidden = tf.nn.relu(hidden) hidden = tf.nn.dropout(hidden, keep_prob=1.0 - dropout) # zero out padding objects hidden = hidden * tf.expand_dims(object_mask, 3) return hidden
def norm_fn(x, name): with tf.variable_scope(name, default_name="norm"): return common_layers.apply_norm(x, hparams.norm_type, hparams.hidden_size, hparams.norm_epsilon)
def _compute_object_logits(hparams, object_hidden, screen_encoding, screen_encoding_bias): """The output layer for a specific domain.""" with tf.variable_scope("compute_object_logits", reuse=tf.AUTO_REUSE): if hparams.alignment == "cosine_similarity": object_hidden = tf.layers.dense( object_hidden, units=hparams.hidden_size) screen_encoding = tf.layers.dense( screen_encoding, units=hparams.hidden_size) norm_screen_encoding = tf.math.l2_normalize(screen_encoding, axis=-1) norm_obj_hidden = tf.math.l2_normalize(object_hidden, axis=-1) align_logits = tf.matmul(norm_screen_encoding, tf.expand_dims(norm_obj_hidden, 3)) elif hparams.alignment == "scaled_cosine_similarity": object_hidden = tf.layers.dense( object_hidden, units=hparams.hidden_size) screen_encoding = tf.reshape( screen_encoding, common_layers.shape_list( screen_encoding)[:-1] + [hparams.hidden_size]) screen_encoding = tf.layers.dense( screen_encoding, units=hparams.hidden_size) norm_screen_encoding = tf.math.l2_normalize(screen_encoding, axis=-1) norm_obj_hidden = tf.math.l2_normalize(object_hidden, axis=-1) dot_products = tf.matmul(norm_screen_encoding, tf.expand_dims(norm_obj_hidden, 3)) align_logits = tf.layers.dense(dot_products, units=1) elif hparams.alignment == "dot_product_attention": object_hidden = tf.layers.dense( object_hidden, units=hparams.hidden_size) align_logits = tf.matmul(screen_encoding, tf.expand_dims(object_hidden, 3)) elif hparams.alignment == "mlp_attention": batch_size = tf.shape(screen_encoding)[0] num_steps = tf.shape(screen_encoding)[1] num_objects = tf.shape(screen_encoding)[2] tiled_object_hidden = tf.tile(tf.expand_dims(object_hidden, 2), [1, 1, num_objects, 1]) align_feature = tf.concat([tiled_object_hidden, screen_encoding], axis=-1) align_feature = tf.reshape( align_feature, [batch_size, num_steps, num_objects, hparams.hidden_size * 2]) with tf.variable_scope("align", reuse=tf.AUTO_REUSE): align_hidden = tf.layers.dense(align_feature, units=hparams.hidden_size) align_hidden = common_layers.apply_norm( align_hidden, hparams.norm_type, hparams.hidden_size, epsilon=hparams.norm_epsilon) align_hidden = tf.nn.tanh(align_hidden) align_logits = tf.layers.dense(align_hidden, units=1) else: raise ValueError("Unsupported alignment: %s" % hparams.alignment) obj_logits = tf.squeeze(align_logits, [3]) + screen_encoding_bias # [batch_size, num_steps] batch_size = common_layers.shape_list(obj_logits)[0] num_steps = common_layers.shape_list(obj_logits)[1] # [batch_size * num_steps, 1] batch_indices = tf.to_int64(tf.reshape( tf.tile(tf.expand_dims(tf.range(batch_size), 1), [1, num_steps]), [-1, 1])) step_indices = tf.to_int64(tf.reshape( tf.tile(tf.expand_dims(tf.range(num_steps), 0), [batch_size, 1]), [-1, 1])) object_indices = tf.reshape(tf.argmax(obj_logits, -1), [-1, 1]) indices = tf.concat([batch_indices, step_indices, object_indices], -1) # [batch_size, num_steps, depth] depth = tf.shape(screen_encoding)[-1] best_logits = tf.reshape( tf.gather_nd(screen_encoding, indices=indices), [batch_size, num_steps, depth]) consumed_logits = tf.layers.dense( tf.reshape(tf.concat([object_hidden, best_logits], -1), [batch_size, num_steps, hparams.hidden_size * 2]), 2) with tf.control_dependencies([tf.assert_equal( tf.reduce_all(tf.math.is_nan(consumed_logits)), False, data=[tf.shape(best_logits), best_logits, tf.constant("screen_encoding"), screen_encoding, tf.constant("indices"), indices], summarize=10000, message="consumed_logits_nan")]): consumed_logits = tf.identity(consumed_logits) return obj_logits, consumed_logits
def norm_fn(x, name): with tf.variable_scope(name, default_name="norm"): return common_layers.apply_norm(x, hparams.norm_type, hparams.hidden_size, hparams.norm_epsilon)
def testApplyNormWithLayerCollection(self): x = np.random.rand(5, 2, 1, 11) layer_collection = kfac.LayerCollection() common_layers.apply_norm(x, "layer", depth=11, epsilon=1e-6, layer_collection=layer_collection) self.assertLen(layer_collection.get_blocks(), 1)