def testDifferingKeyHeadSizes(self, gate_style): """Checks if arbitrary key sizes are still supported.""" mem_slots = 2 head_size = 32 num_heads = 2 key_size = 128 batch_size = 5 input_shape = (batch_size, 3, 3) mem = relational_memory.RelationalMemory(mem_slots, head_size, num_heads, gate_style=gate_style, key_size=key_size) self.assertNotEqual(key_size, mem._head_size) inputs = tf.placeholder(tf.float32, input_shape) memory_0 = mem.initial_state(batch_size) _, memory_1 = mem(inputs, memory_0) with self.test_session() as session: tf.global_variables_initializer().run() results = session.run({ "memory_1": memory_1, "memory_0": memory_0 }, feed_dict={inputs: np.ones(input_shape)}) self.assertTrue( np.any(np.not_equal(results["memory_0"], results["memory_1"])))
def testRecurrence(self, mem_slots, head_size, num_heads): """Checks if you can run the relational memory for 2 steps.""" batch_size = 5 num_blocks = 5 input_shape = [batch_size, 3, 1] mem = relational_memory.RelationalMemory(mem_slots, head_size, num_heads, num_blocks=num_blocks) inputs = tf.placeholder(tf.float32, input_shape) hidden_0 = mem.initial_state(batch_size) _, hidden_1 = mem(inputs, hidden_0) _, hidden_2 = mem(inputs, hidden_1) with self.test_session() as session: tf.global_variables_initializer().run() results = session.run({ "hidden_2": hidden_2, "hidden_1": hidden_1 }, feed_dict={inputs: np.zeros(input_shape)}) self.assertAllEqual(results["hidden_1"].shape, results["hidden_2"].shape)
def testInputErasureWorking(self, gate_style): """Checks if gating is working by ignoring the input.""" mem_slots = 2 head_size = 32 num_heads = 2 batch_size = 5 input_shape = (batch_size, 3, 3) mem = relational_memory.RelationalMemory(mem_slots, head_size, num_heads, forget_bias=float("+inf"), input_bias=float("-inf"), gate_style=gate_style) inputs = tf.placeholder(tf.float32, input_shape) memory_0 = mem.initial_state(batch_size) _, memory_1 = mem(inputs, memory_0) with self.test_session() as session: tf.global_variables_initializer().run() results = session.run({ "memory_1": memory_1, "memory_0": memory_0 }, feed_dict={inputs: np.ones(input_shape)}) self.assertAllEqual(results["memory_0"], results["memory_1"])
def testMemoryUpdating(self): """Checks if memory is updating correctly.""" mem_slots = 2 head_size = 32 num_heads = 4 batch_size = 5 input_shape = (batch_size, 3, 3) mem = relational_memory.RelationalMemory(mem_slots, head_size, num_heads, gate_style=None) inputs = tf.placeholder(tf.float32, input_shape) memory_0 = mem.initial_state(batch_size) _, memory_1 = mem(inputs, memory_0) with self.test_session() as session: tf.global_variables_initializer().run() results = session.run({ "memory_1": memory_1, "memory_0": memory_0 }, feed_dict={inputs: np.zeros(input_shape)}) self.assertTrue( np.any(np.not_equal(results["memory_0"], results["memory_1"])))
def testStateSizeOutputSize(self): """Checks for correct `state_size` and `output_size` return values.""" mem_slots = 4 head_size = 32 mem = relational_memory.RelationalMemory(mem_slots, head_size) self.assertItemsEqual([mem._mem_slots, mem._mem_size], mem.state_size.as_list()) self.assertItemsEqual([mem._mem_slots * mem._mem_size], mem.output_size.as_list())
def testBadInputs(self): """Test that verifies errors are thrown for bad input arguments.""" mem_slots = 4 head_size = 32 with self.assertRaisesRegexp(ValueError, "num_blocks must be >= 1"): relational_memory.RelationalMemory(mem_slots, head_size, num_blocks=0) with self.assertRaisesRegexp(ValueError, "attention_mlp_layers must be >= 1"): relational_memory.RelationalMemory(mem_slots, head_size, attention_mlp_layers=0) with self.assertRaisesRegexp(ValueError, "gate_style must be one of"): relational_memory.RelationalMemory(mem_slots, head_size, gate_style="bad_gate")
def __init__(self, vocab_size, embedding_size, batch_size, initialization, mem_slots, num_heads, use_pos, attention_mlp_layers, head_size): # Placeholders for input, output self.input_x = tf.placeholder(tf.int32, [batch_size, 3], name="input_h") self.input_y = tf.placeholder(tf.float32, [batch_size, 1], name="input_y") self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") # Embedding layer with tf.name_scope("embedding"): if initialization != []: self.input_feature = tf.get_variable(name="input_feature_1", initializer=initialization) else: self.input_feature = tf.get_variable(name="input_feature_2", shape=[vocab_size, embedding_size], initializer=tf.contrib.layers.xavier_initializer(seed=1234)) # Embedding lookup self.emb = tf.nn.embedding_lookup(self.input_feature, self.input_x) if use_pos == 1: self.emb = add_positional_embedding(self.emb, 3, embedding_size) self.h_emb, self.r_emb, self.t_emb = tf.split(self.emb, num_or_size_splits=3, axis=1) self.h_emb = tf.squeeze(self.h_emb) self.r_emb = tf.squeeze(self.r_emb) self.t_emb = tf.squeeze(self.t_emb) gen_mem = relational_memory.RelationalMemory(mem_slots=mem_slots, head_size=head_size, num_heads=num_heads, gate_style='memory', attention_mlp_layers=attention_mlp_layers) init_states = gen_mem.initial_state(batch_size=batch_size) mem_output1, memory_input_next_step = gen_mem(self.h_emb, init_states) mem_output2, memory_input_next_step = gen_mem(self.r_emb, memory_input_next_step) mem_output3, memory_input_next_step = gen_mem(self.t_emb, memory_input_next_step) self.final_output = tf.nn.dropout(mem_output1 * mem_output2 * mem_output3, self.dropout_keep_prob) # Final scores and predictions with tf.name_scope("output1"): W1 = tf.get_variable("W1", shape=[self.final_output.get_shape()[-1], 1], initializer=tf.contrib.layers.xavier_initializer(seed=1234)) b1 = tf.Variable(tf.zeros([1])) self.scores = tf.nn.xw_plus_b(self.final_output, W1, b1, name="scores") self.predictions = tf.nn.sigmoid(self.scores) # Calculate mean cross-entropy loss with tf.name_scope("loss"): losses = tf.nn.softplus(self.scores * self.input_y) self.loss = tf.reduce_mean(losses) self.saver = tf.compat.v1.train.Saver(tf.global_variables(), max_to_keep=500)
def testGateShapes(self, gate_style): """Checks the shapes of RelationalMemory gates.""" mem_slots = 4 head_size = 32 num_heads = 4 batch_size = 4 input_shape = (batch_size, 3, 3) mem = relational_memory.RelationalMemory(mem_slots, head_size, num_heads, gate_style=gate_style) inputs = tf.placeholder(tf.float32, input_shape) init_state = mem.initial_state(batch_size) mem(inputs, init_state) gate_size = mem._calculate_gate_size() expected_size = [batch_size, num_heads, gate_size] self.assertEqual(mem.input_gate.get_shape().as_list(), expected_size) self.assertEqual(mem.forget_gate.get_shape().as_list(), expected_size)
def testOutputStateShapes(self, treat_input_as_matrix): """Checks the shapes of RelationalMemory output and state.""" mem_slots = 4 head_size = 32 num_heads = 2 batch_size = 5 input_shape = (batch_size, 3, 3) mem = relational_memory.RelationalMemory(mem_slots, head_size, num_heads) inputs = tf.placeholder(tf.float32, input_shape) init_state = mem.initial_state(batch_size) out = mem(inputs, init_state, treat_input_as_matrix=treat_input_as_matrix) with self.test_session() as session: tf.global_variables_initializer().run() new_out, new_memory = session.run( out, feed_dict={inputs: np.zeros(input_shape)}) self.assertAllEqual(init_state.get_shape().as_list(), new_memory.shape) self.assertAllEqual(new_out.shape, [batch_size, mem_slots * head_size * num_heads])
def __init__(self, embedding_size, batch_size, initialization, mem_slots, num_heads, use_pos, attention_mlp_layers, head_size, num_filters=128): # Placeholders for input, output self.input_x = tf.compat.v1.placeholder(tf.int32, [batch_size, 3], name="input_h") self.input_y = tf.compat.v1.placeholder(tf.float32, [batch_size, 1], name="input_y") self.dropout_keep_prob = tf.compat.v1.placeholder(tf.float32, name="dropout_keep_prob") # Embedding layer with tf.name_scope("embedding"): self.W_query = tf.compat.v1.get_variable(name="W_query", initializer=initialization[0], trainable=False) self.W_user = tf.compat.v1.get_variable(name="W_user", initializer=initialization[1]) self.W_doc = tf.compat.v1.get_variable(name="W_doc", initializer=initialization[2], trainable=False) # Embedding lookup self.h_emb = tf.nn.embedding_lookup(self.W_query, self.input_x[:, 0]) self.r_emb = tf.nn.embedding_lookup(self.W_user, self.input_x[:, 1]) self.t_emb = tf.nn.embedding_lookup(self.W_doc, self.input_x[:, 2]) if use_pos == 1: self.h_emb = add_positional_embedding(self.h_emb, 1, embedding_size, name="pos_h") self.r_emb = add_positional_embedding(self.r_emb, 1, embedding_size, name="pos_r") self.t_emb = add_positional_embedding(self.t_emb, 1, embedding_size, name="pos_t") gen_mem = relational_memory.RelationalMemory(mem_slots=mem_slots, head_size=head_size, num_heads=num_heads, gate_style='memory', attention_mlp_layers=attention_mlp_layers) init_states = gen_mem.initial_state(batch_size=batch_size) mem_output1, memory_input_next_step = gen_mem(self.h_emb, init_states) mem_output2, memory_input_next_step = gen_mem(self.r_emb, memory_input_next_step) mem_output3, memory_input_next_step = gen_mem(self.t_emb, memory_input_next_step) mem_output1 = tf.compat.v1.reshape(mem_output1, [-1, 1, mem_output1.get_shape()[-1]]) mem_output2 = tf.compat.v1.reshape(mem_output2, [-1, 1, mem_output2.get_shape()[-1]]) mem_output3 = tf.compat.v1.reshape(mem_output3, [-1, 1, mem_output3.get_shape()[-1]]) mem_output = tf.compat.v1.concat([mem_output1, mem_output2, mem_output3], axis=1) self.input_cnn = tf.expand_dims(mem_output, -1) # CNN decoder # Create a convolution + maxpool layer for each filter size pooled_outputs = [] with tf.name_scope("conv-maxpool"): W = tf.compat.v1.get_variable("W_conv", shape=[3, 1, 1, num_filters], initializer=tf.contrib.layers.xavier_initializer(seed=1234)) b = tf.Variable(tf.zeros([num_filters])) conv = tf.nn.conv2d(self.input_cnn, W, strides=[1, 1, 1, 1], padding="VALID", name="conv") # Apply nonlinearity self.h_pool = tf.compat.v1.nn.relu(tf.nn.bias_add(conv, b), name="relu") # Maxpooling over the outputs self.h_pool = tf.squeeze(tf.nn.max_pool(self.h_pool, ksize=[1, 1, self.input_cnn.get_shape()[-2], 1], strides=[1, 1, 1, 1], padding='VALID', name="pool")) # Add dropout with tf.name_scope("dropout"): self.final_output = tf.nn.dropout(self.h_pool, self.dropout_keep_prob) # Final scores and predictions with tf.name_scope("output"): W_output = tf.compat.v1.get_variable("W1", shape=[self.final_output.get_shape()[-1], 1], initializer=tf.contrib.layers.xavier_initializer(seed=1234)) b_output = tf.Variable(tf.zeros([1])) self.scores = tf.compat.v1.nn.xw_plus_b(self.final_output, W_output, b_output, name="scores") self.predictions = tf.compat.v1.nn.sigmoid(self.scores) # Calculate mean cross-entropy loss with tf.name_scope("loss"): losses = tf.compat.v1.nn.softplus(self.scores * self.input_y) self.loss = tf.reduce_mean(losses) self.saver = tf.compat.v1.train.Saver(tf.global_variables(), max_to_keep=500)