def __init__(self, memory_size=128, word_size=20, num_reads=1, num_writes=1, name='memory_access'): """Creates a MemoryAccess module. Args: memory_size: The number of memory slots (N in the DNC paper). word_size: The width of each memory slot (W in the DNC paper) num_reads: The number of read heads (R in the DNC paper). num_writes: The number of write heads (fixed at 1 in the paper). name: The name of the module. """ super(MemoryAccess, self).__init__(name=name) self._memory_size = memory_size self._word_size = word_size self._num_reads = num_reads self._num_writes = num_writes #self._name = name self._write_content_weights_mod = addressing.CosineWeights( num_writes, word_size, name='write_content_weights') self._read_content_weights_mod = addressing.CosineWeights( num_reads, word_size, name='read_content_weights') self._linkage = addressing.TemporalLinkage(memory_size, num_writes) self._freeness = addressing.Freeness(memory_size)
def testWriteAllocationWeights(self): batch_size = 7 memory_size = 23 num_writes = 5 module = addressing.Freeness(memory_size) usage = np.random.rand(batch_size, memory_size) write_gates = np.random.rand(batch_size, num_writes) # Turn off gates for heads 1 and 3 in batch 0. This doesn't scaling down the # weighting, but it means that the usage doesn't change, so we should get # the same allocation weightings for: (1, 2) and (3, 4) (but all others # being different). write_gates[0, 1] = 0 write_gates[0, 3] = 0 # and turn heads 0 and 2 on for full effect. write_gates[0, 0] = 1 write_gates[0, 2] = 1 # In batch 1, make one of the usages 0 and another almost 0, so that these # entries get most of the allocation weights for the first and second heads. usage[ 1] = usage[1] * 0.9 + 0.1 # make sure all entries are in [0.1, 1] usage[1][4] = 0 # write head 0 should get allocated to position 4 usage[1][3] = 1e-4 # write head 1 should get allocated to position 3 write_gates[1, 0] = 1 # write head 0 fully on write_gates[1, 1] = 1 # write head 1 fully on weights = module.write_allocation_weights( usage=tf.constant(usage), write_gates=tf.constant(write_gates), num_writes=num_writes) with self.test_session(): weights = weights.eval() # Check that all weights are between 0 and 1 self.assertGreaterEqual(weights.min(), 0) self.assertLessEqual(weights.max(), 1) # Check that weights sum to close to 1 self.assertAllClose(np.sum(weights, axis=2), np.ones([batch_size, num_writes]), atol=1e-3) # Check the same / different allocation weight pairs as described above. self.assertGreater( np.abs(weights[0, 0, :] - weights[0, 1, :]).max(), 0.1) self.assertAllEqual(weights[0, 1, :], weights[0, 2, :]) self.assertGreater( np.abs(weights[0, 2, :] - weights[0, 3, :]).max(), 0.1) self.assertAllEqual(weights[0, 3, :], weights[0, 4, :]) self.assertAllClose(weights[1][0], util.one_hot(memory_size, 4), atol=1e-3) self.assertAllClose(weights[1][1], util.one_hot(memory_size, 3), atol=1e-3)
def testAllocationGradient(self): batch_size = 1 memory_size = 5 usage = tf.constant(np.random.rand(batch_size, memory_size)) module = addressing.Freeness(memory_size) allocation = module._allocation(usage) with self.test_session(): err = tf.test.compute_gradient_error( usage, usage.get_shape().as_list(), allocation, allocation.get_shape().as_list(), delta=1e-5) self.assertLess(err, 0.01)
def testAllocation(self): batch_size = 7 memory_size = 13 usage = np.random.rand(batch_size, memory_size) module = addressing.Freeness(memory_size) allocation = module._allocation(tf.constant(usage)) with self.test_session(): allocation = allocation.eval() # 1. Test that max allocation goes to min usage, and vice versa. self.assertAllEqual(np.argmin(usage, axis=1), np.argmax(allocation, axis=1)) self.assertAllEqual(np.argmax(usage, axis=1), np.argmin(allocation, axis=1)) # 2. Test that allocations sum to almost 1. self.assertAllClose(np.sum(allocation, axis=1), np.ones(batch_size), 0.01)
def testWriteAllocationWeightsGradient(self): batch_size = 7 memory_size = 5 num_writes = 3 module = addressing.Freeness(memory_size) usage = tf.constant(np.random.rand(batch_size, memory_size)) write_gates = tf.constant(np.random.rand(batch_size, num_writes)) weights = module.write_allocation_weights(usage, write_gates, num_writes) with self.test_session(): err = tf.test.compute_gradient_error([usage, write_gates], [ usage.get_shape().as_list(), write_gates.get_shape().as_list() ], weights, weights.get_shape().as_list(), delta=1e-5) self.assertLess(err, 0.01)
def testModule(self): batch_size = 5 memory_size = 11 num_reads = 3 num_writes = 7 module = addressing.Freeness(memory_size) free_gate = np.random.rand(batch_size, num_reads) # Produce read weights that sum to 1 for each batch and head. prev_read_weights = np.random.rand(batch_size, num_reads, memory_size) prev_read_weights[1, :, 3] = 0 # no read at batch 1, position 3; see below prev_read_weights /= prev_read_weights.sum(2, keepdims=True) prev_write_weights = np.random.rand(batch_size, num_writes, memory_size) prev_write_weights /= prev_write_weights.sum(2, keepdims=True) prev_usage = np.random.rand(batch_size, memory_size) # Add some special values that allows us to test the behaviour: prev_write_weights[1, 2, 3] = 1 # full write in batch 1, head 2, position 3 prev_read_weights[2, 0, 4] = 1 # full read at batch 2, head 0, position 4 free_gate[2, 0] = 1 # can free up all locations for batch 2, read head 0 usage = module(tf.constant(prev_write_weights), tf.constant(free_gate), tf.constant(prev_read_weights), tf.constant(prev_usage)) with self.test_session(): usage = usage.eval() # Check all usages are between 0 and 1. self.assertGreaterEqual(usage.min(), 0) self.assertLessEqual(usage.max(), 1) # Check that the full write at batch 1, position 3 makes it fully used. self.assertEqual(usage[1][3], 1) # Check that the full free at batch 2, position 4 makes it fully free. self.assertEqual(usage[2][4], 0)