def test_construction(self): interface = DNC.interface( read_keys=None, read_strengths=None, write_key=np.random.uniform(0, 1, (3, 9, 1)).astype(np.float32), write_strength=np.random.uniform(0, 1, (3, 1)).astype(np.float32), erase_vector=tf.convert_to_tensor( np.zeros((3, 9)).astype(np.float32)), write_vector=tf.convert_to_tensor( np.random.uniform(0, 1, (3, 9)).astype(np.float32)), free_gates=np.random.uniform(0, 1, (3, 5)).astype(np.float32), allocation_gate=np.random.uniform(0, 1, (3, 1)).astype(np.float32), write_gate=np.random.uniform(0, 1, (3, 1)).astype(np.float32), read_modes=None, ) memory = Memory(13, 9, 5) memory_state = memory.get_initial_state(batch_size=3) usage, write_weighting, memory, link_matrix, precedence = memory.write( memory_state, interface) self.assertEqual(usage.shape, (3, 13)) self.assertEqual(write_weighting.shape, (3, 13)) self.assertEqual(memory.shape, (3, 13, 9)) self.assertEqual(link_matrix.shape, (3, 13, 13)) self.assertEqual(precedence.shape, (3, 13))
def test_get_allocation_weighting(self): graph = tf.Graph() with graph.as_default(): with tf.Session(graph=graph) as session: mem = Memory(4, 5, 2, 2) mock_usage = np.random.uniform(0.01, 1, (2, 4)).astype(np.float32) sorted_usage = np.sort(mock_usage, axis=1) free_list = np.argsort(mock_usage, axis=1) predicted_weights = np.zeros((2, 4)).astype(np.float32) for i in range(2): for j in range(4): product_list = [ mock_usage[i, free_list[i, k]] for k in range(j) ] predicted_weights[i, free_list[ i, j]] = (1 - mock_usage[i, free_list[i, j]] ) * np.product(product_list) op = mem.get_allocation_weighting(sorted_usage, free_list) a = session.run(op) self.assertEqual(a.shape, (2, 4)) self.assertTrue(np.allclose(a, predicted_weights))
def test_construction(self): interface = DNC.interface( read_keys=None, read_strengths=None, write_key=np.random.uniform(0, 1, (3, 9, 1)).astype(np.float32), write_strength=np.random.uniform(0, 1, (3, 1)).astype(np.float32), erase_vector=tf.convert_to_tensor( np.zeros((3, 9)).astype(np.float32)), write_vector=tf.convert_to_tensor( np.random.uniform(0, 1, (3, 9)).astype(np.float32)), free_gates=np.random.uniform(0, 1, (3, 5)).astype(np.float32), allocation_gate=np.random.uniform(0, 1, (3, 1)).astype(np.float32), write_gate=np.random.uniform(0, 1, (3, 1)).astype(np.float32), read_modes=None, ) memory = Memory(13, 9, 5) memory_state = memory.initial_state(3) write_op = memory.write(memory_state, interface) init_op = tf.global_variables_initializer() with self.test_session() as session: init_op.run() usage, write_weighting, memory, link_matrix, precedence = session.run( write_op) self.assertEqual(usage.shape, (3, 13)) self.assertEqual(write_weighting.shape, (3, 13)) self.assertEqual(memory.shape, (3, 13, 9)) self.assertEqual(link_matrix.shape, (3, 13, 13)) self.assertEqual(precedence.shape, (3, 13))
def test_update_memory(self): graph = tf.Graph() with graph.as_default(): with tf.Session(graph=graph) as session: mem = Memory(4, 5, 2, 2) write_weighting = random_softmax((2, 4), axis=1) write_vector = np.random.uniform(0, 1, (2, 5)).astype(np.float32) erase_vector = np.random.uniform(0, 1, (2, 5)).astype(np.float32) memory_matrix = np.random.uniform(-1, 1, (2, 4, 5)).astype(np.float32) ww = write_weighting[:, :, np.newaxis] v, e = write_vector[:, np.newaxis, :], erase_vector[:, np.newaxis, :] predicted = memory_matrix * (1 - np.matmul(ww, e)) + np.matmul( ww, v) memory_matrix = tf.convert_to_tensor(memory_matrix) op = mem.update_memory(memory_matrix, write_weighting, write_vector, erase_vector) M = session.run(op) self.assertEqual(M.shape, (2, 4, 5)) self.assertTrue(np.allclose(M, predicted))
def test_lookup_weighting(self): graph = tf.Graph() with graph.as_default(): with tf.Session(graph=graph) as session: mem = Memory(4, 5, 2, 2) initial_mem = np.random.uniform(0, 1, (2, 4, 5)).astype(np.float32) keys = np.random.uniform(0, 1, (2, 5, 2)).astype(np.float32) strengths = np.random.uniform(0, 1, (2, 2)).astype(np.float32) norm_mem = initial_mem / np.sqrt( np.sum(initial_mem**2, axis=2, keepdims=True)) norm_keys = keys / np.sqrt( np.sum(keys**2, axis=1, keepdims=True)) sim = np.matmul(norm_mem, norm_keys) sim = sim * strengths[:, np.newaxis, :] predicted_wieghts = np.exp(sim) / np.sum( np.exp(sim), axis=1, keepdims=True) memory_matrix = tf.convert_to_tensor(initial_mem) op = mem.get_lookup_weighting(memory_matrix, keys, strengths) c = session.run(op) self.assertEqual(c.shape, (2, 4, 2)) self.assertTrue(np.allclose(c, predicted_wieghts))
def test_update_usage_vector(self): graph = tf.Graph() with graph.as_default(): with tf.Session(graph=graph) as session: mem = Memory(4, 5, 2, 2) free_gates = np.random.uniform(0, 1, (2, 2)).astype(np.float32) init_read_weightings = random_softmax((2, 4, 2), axis=1) init_write_weightings = random_softmax((2, 4), axis=1) init_usage = np.random.uniform(0, 1, (2, 4)).astype(np.float32) psi = np.product( 1 - init_read_weightings * free_gates[:, np.newaxis, :], axis=2) predicted_usage = (init_usage + init_write_weightings - init_usage * init_write_weightings) * psi read_weightings = tf.convert_to_tensor(init_read_weightings) write_weighting = tf.convert_to_tensor(init_write_weightings) usage_vector = tf.convert_to_tensor(init_usage) op = mem.update_usage_vector(usage_vector, read_weightings, write_weighting, free_gates) u = session.run(op) self.assertEqual(u.shape, (2, 4)) self.assertTrue(np.array_equal(u, predicted_usage))
def test_update_link_matrix(self): graph = tf.Graph() with graph.as_default(): with tf.compat.v1.Session(graph=graph) as session: mem = Memory(4, 5, 2, 2) _write_weighting = random_softmax((2, 4), axis=1) _precedence_vector = random_softmax((2, 4), axis=1) initial_link = np.random.uniform(0, 1, (2, 4, 4)).astype(np.float32) np.fill_diagonal(initial_link[0, :], 0) np.fill_diagonal(initial_link[1, :], 0) # calculate the updated link iteratively as in paper # to check the correctness of the vectorized implementation predicted = np.zeros((2, 4, 4), dtype=np.float32) for i in range(4): for j in range(4): if i != j: reset_factor = (1 - _write_weighting[:, i] - _write_weighting[:, j]) predicted[:, i, j] = reset_factor * initial_link[:, i, j] + _write_weighting[:, i] * \ _precedence_vector[:, j] link_matrix = tf.convert_to_tensor(value=initial_link) precedence_vector = tf.convert_to_tensor( value=_precedence_vector) write_weighting = tf.constant(_write_weighting) op = mem.update_link_matrix(precedence_vector, link_matrix, write_weighting) L = session.run(op) self.assertTrue(np.allclose(L, predicted))
def test_construction(self): graph = tf.Graph() with graph.as_default(): with tf.Session(graph=graph) as session: mem = Memory(4, 5, 2, 2) session.run(tf.initialize_all_variables()) self.assertEqual(mem.words_num, 4) self.assertEqual(mem.word_size, 5) self.assertEqual(mem.read_heads, 2) self.assertEqual(mem.batch_size, 2) self.assertEqual(mem.memory_matrix.get_shape().as_list(), [2, 4, 5]) self.assertEqual(mem.usage_vector.get_shape().as_list(), [2, 4]) self.assertEqual(mem.link_matrix.get_shape().as_list(), [2, 4, 4]) self.assertEqual(mem.write_weighting.get_shape().as_list(), [2, 4]) self.assertEqual(mem.read_weightings.get_shape().as_list(), [2, 4, 2]) self.assertEqual(mem.read_vectors.get_shape().as_list(), [2, 5, 2])
def test_write(self): graph = tf.Graph() with graph.as_default(): with tf.Session(graph=graph) as session: mem = Memory(4, 5, 2, 1) M, u, p, L, ww, rw, r = session.run(mem.init_memory()) key = np.random.uniform(0, 1, (1, 5, 1)).astype(np.float32) strength = np.random.uniform(0, 1, (1, 1)).astype(np.float32) free_gates = np.random.uniform(0, 1, (1, 2)).astype(np.float32) write_gate = np.random.uniform(0, 1, (1, 1)).astype(np.float32) allocation_gate = np.random.uniform(0, 1, (1, 1)).astype(np.float32) write_vector = np.random.uniform(0, 1, (1, 5)).astype(np.float32) erase_vector = np.zeros((1, 5)).astype(np.float32) u_op, ww_op, M_op, L_op, p_op = mem.write( M, u, rw, ww, p, L, key, strength, free_gates, allocation_gate, write_gate, write_vector, erase_vector) session.run(tf.initialize_all_variables()) u, ww, M, L, p = session.run([u_op, ww_op, M_op, L_op, p_op]) self.assertEqual(u.shape, (1, 4)) self.assertEqual(ww.shape, (1, 4)) self.assertEqual(M.shape, (1, 4, 5)) self.assertEqual(L.shape, (1, 4, 4)) self.assertEqual(p.shape, (1, 4))
def __init__(self, controller_class, input_size, output_size, max_sequence_length, memory_words_num=256, memory_word_size=64, memory_read_heads=4, batch_size=128): """ constructs a complete DNC architecture as described in the DNC paper http://www.nature.com/nature/journal/vaop/ncurrent/full/nature20101.html Parameters: ----------- controller_class: BaseController a concrete implementation of the BaseController class input_size: int the size of the input vector output_size: int the size of the output vector max_sequence_length: int the maximum length of an input sequence memory_words_num: int the number of words that can be stored in memory memory_word_size: int the size of an individual word in memory memory_read_heads: int the number of read heads in the memory batch_size: int the size of the data batch """ self.input_size = input_size self.output_size = output_size self.max_sequence_length = max_sequence_length self.words_num = memory_words_num self.word_size = memory_word_size self.read_heads = memory_read_heads self.batch_size = batch_size self.memory = Memory(self.words_num, self.word_size, self.read_heads, self.batch_size) self.controller = controller_class(self.input_size, self.output_size, self.read_heads, self.word_size, self.batch_size) # input data placeholders self.input_data = tf.placeholder(tf.float32, [None, None, chunk_size], name='input') self.target_output = tf.placeholder(tf.float32, [None, None, output_size], name='targets') #self.input_data = tf.placeholder(tf.float32, [batch_size, None, input_size], name='input') #self.target_output = tf.placeholder(tf.float32, [batch_size, None, output_size], name='targets') self.sequence_length = tf.placeholder(tf.int32, name='sequence_length') self.build_graph()
def test_init_memory(self): memory = Memory(words_num=13, word_size=7, read_heads_num=2) state = memory.get_initial_state(batch_size=9) self.assertEqual(state.memory_matrix.shape, (9, 13, 7)) self.assertEqual(state.usage_vector.shape, (9, 13)) self.assertEqual(state.link_matrix.shape, (9, 13, 13)) self.assertEqual(state.precedence_vector.shape, (9, 13)) self.assertEqual(state.write_weighting.shape, (9, 13)) self.assertEqual(state.read_weightings.shape, (9, 13, 2))
def test_construction(self): graph = tf.Graph() with graph.as_default(): with tf.Session(graph=graph) as session: mem = Memory(4, 5, 2, 2) session.run(tf.initialize_all_variables()) self.assertEqual(mem.words_num, 4) self.assertEqual(mem.word_size, 5) self.assertEqual(mem.read_heads, 2) self.assertEqual(mem.batch_size, 2)
def test_init_memory(self): graph = tf.Graph() with graph.as_default(): with tf.Session(graph=graph) as session: mem = Memory(4, 5, 2, 2) M, u, p, L, ww, rw, r = session.run(mem.init_memory()) self.assertEqual(M.shape, (2, 4, 5)) self.assertEqual(u.shape, (2, 4)) self.assertEqual(L.shape, (2, 4, 4)) self.assertEqual(ww.shape, (2, 4)) self.assertEqual(rw.shape, (2, 4, 2)) self.assertEqual(r.shape, (2, 5, 2)) self.assertEqual(p.shape, (2, 4))
def test_update_read_vectors(self): graph = tf.Graph() with graph.as_default(): with tf.Session(graph = graph) as session: mem = Memory(4, 5, 2, 4) memory_matrix = np.random.uniform(-1, 1, (4, 4, 5)).astype(np.float32) read_weightings = random_softmax((4, 4, 2), axis=1) predicted = np.matmul(np.transpose(memory_matrix, [0, 2, 1]), read_weightings) op = mem.update_read_vectors(memory_matrix, read_weightings) session.run(tf.global_variables_initializer()) r = session.run(op) #updated_read_vectors = session.run(mem.read_vectors.value()) self.assertTrue(np.allclose(r, predicted))
def test_update_precedence_vector(self): graph = tf.Graph() with graph.as_default(): with tf.Session(graph=graph) as session: mem = Memory(4, 5, 2, 2) write_weighting = random_softmax((2, 4), axis=1) initial_precedence = random_softmax((2, 4), axis=1) predicted = (1 - write_weighting.sum(axis=1, keepdims=True)) * initial_precedence + write_weighting precedence_vector = tf.convert_to_tensor(initial_precedence) op = mem.update_precedence_vector(precedence_vector, write_weighting) p = session.run(op) self.assertEqual(p.shape, (2,4)) self.assertTrue(np.allclose(p, predicted))
def test_read(self): graph = tf.Graph() with graph.as_default(): with tf.Session(graph = graph) as session: mem = Memory(4, 5, 2, 1) M, u, p, L, ww, rw, r = session.run(mem.init_memory()) keys = np.random.uniform(0, 1, (1, 5, 2)).astype(np.float32) strengths = np.random.uniform(0, 1, (1, 2)).astype(np.float32) link_matrix = np.random.uniform(0, 1, (1, 4, 4)).astype(np.float32) read_modes = random_softmax((1, 3, 2), axis=1).astype(np.float32) memory_matrix = np.random.uniform(-1, 1, (1, 4, 5)).astype(np.float32) wr_op, r_op = mem.read(memory_matrix, rw, keys, strengths, link_matrix, read_modes) session.run(tf.global_variables_initializer()) wr, r = session.run([wr_op, r_op]) self.assertEqual(wr.shape, (1, 4, 2)) self.assertEqual(r.shape, (1, 5, 2))
def test_updated_write_weighting(self): graph = tf.Graph() with graph.as_default(): with tf.Session(graph=graph) as session: mem = Memory(4, 5, 2, 2) write_gate = np.random.uniform(0, 1, (2,1)).astype(np.float32) allocation_gate = np.random.uniform(0, 1, (2,1)).astype(np.float32) lookup_weighting = random_softmax((2, 4, 1), axis=1) allocation_weighting = random_softmax((2, 4), axis=1) predicted_weights = write_gate * (allocation_gate * allocation_weighting + (1 - allocation_gate) * np.squeeze(lookup_weighting)) op = mem.update_write_weighting(lookup_weighting, allocation_weighting, write_gate, allocation_gate) w_w = session.run(op) self.assertEqual(w_w.shape, (2,4)) self.assertTrue(np.allclose(w_w, predicted_weights))
def test_get_directional_weightings(self): graph = tf.Graph() with graph.as_default(): with tf.Session(graph=graph) as session: mem = Memory(4, 5, 2, 2) _link_matrix = np.random.uniform(0, 1, (2, 4, 4)).astype(np.float32) _read_weightings = random_softmax((2, 4, 2), axis=1) predicted_forward = np.matmul(_link_matrix, _read_weightings) predicted_backward = np.matmul(np.transpose(_link_matrix, [0, 2, 1]), _read_weightings) read_weightings = tf.convert_to_tensor(_read_weightings) fop, bop = mem.get_directional_weightings(read_weightings, _link_matrix) forward_weighting, backward_weighting = session.run([fop, bop]) self.assertTrue(np.allclose(forward_weighting, predicted_forward)) self.assertTrue(np.allclose(backward_weighting, predicted_backward))
def test_update_read_weightings(self): graph = tf.Graph() with graph.as_default(): with tf.Session(graph=graph) as session: mem = Memory(4, 5, 2, 2) lookup_weightings = random_softmax((2, 4, 2), axis=1) forward_weighting = random_softmax((2, 4, 2), axis=1) backward_weighting = random_softmax((2, 4, 2), axis=1) read_mode = random_softmax((2, 3, 2), axis=1) predicted_weights = np.zeros((2, 4, 2)).astype(np.float32) # calculate the predicted weights using iterative method from paper # to check the correcteness of the vectorized implementation for i in range(2): predicted_weights[:, :, i] = read_mode[:, 0, i, np. newaxis] * backward_weighting[:, :, i] + read_mode[:, 1, i, np . newaxis] * lookup_weightings[:, :, i] + read_mode[:, 2, i, np . newaxis] * forward_weighting[:, :, i] op = mem.update_read_weightings(lookup_weightings, forward_weighting, backward_weighting, read_mode) session.run(tf.initialize_all_variables()) w_r = session.run(op) #updated_read_weightings = session.run(mem.read_weightings.value()) self.assertTrue(np.allclose(w_r, predicted_weights))
parser.add_argument("--no-dnc", action='store_true') parser.add_argument("--savedir", type=str, default="model") parser.add_argument("--logdir", type=str, default="logs") parser.add_argument("--learningrate", type=float, default=1e-4) parser.add_argument("--no-mask", action='store_true') args = parser.parse_args() BATCH_SIZE = args.batch_size task = eval(args.task) if args.test_params: test_params = eval(args.test_params) else: test_params = tuple(np.max(p) for p in task.default_params) memory = Memory(args.msize, args.mwidth, init_state=args.minit) memory.add_head(NTMReadHead, shifts=[-1, 0, 1]) memory.add_head(NTMWriteHead, shifts=[-1, 0, 1]) input = tf.placeholder(tf.float32, shape=(None, None, task.input_size)) # if args.controller == 'lstm': controller = LSTMCell(args.controller_size) elif args.controller == 'multilstm': controller = tf.nn.rnn_cell.MultiRNNCell( [LSTMCell(args.controller_size) for i in range(3)]) elif args.controller == 'ff': controller = dnc.ff.FFWrapper( dnc.ff.simple_feedforward(hidden=[args.controller_size] * 2)) if not args.no_dnc:
import tensorflow as tf import numpy as np from dnc import DNC, LSTMCell from dnc.memory import Memory, NTMReadHead, NTMWriteHead from tasks import CopyTask, RepeatCopyTask, AndTask, XorTask, MergeTask from utils import * INPUT_SIZE = 8 BATCH_SIZE = 32 memory = Memory(25, 6) memory.add_head(NTMReadHead, shifts=[-1, 0, 1]) memory.add_head(NTMReadHead, shifts=[-1, 0, 1]) memory.add_head(NTMWriteHead, shifts=[-1, 0, 1]) input = tf.placeholder(tf.float32, shape=(None, None, INPUT_SIZE+2)) #lstm = tf.nn.rnn_cell.MultiRNNCell([LSTMCell(256) for i in range(3)]) lstm = LSTMCell(100) net = DNC(input, memory, INPUT_SIZE+2, controller = lstm, log_memory=True) targets = tf.placeholder(dtype=tf.float32, shape=[None, None, INPUT_SIZE+2]) mask = tf.placeholder(dtype=tf.float32, shape=[None, None, INPUT_SIZE+2]) output = net[0] loss = tf.losses.sigmoid_cross_entropy(logits=output, weights=mask, multi_class_labels=targets) cost = tf.reduce_sum( mask*((1 - targets * (1 - tf.exp(-output))) * tf.sigmoid(output)) ) / BATCH_SIZE opt = tf.train.RMSPropOptimizer(1e-4, momentum=0.9) train = minimize_and_clip(opt, loss)