def testOps(self): ''' Verify that each of the operations (convolution, gating, etc.) are correct. Only compare the output from a single batch element and single time slice. ''' mem_size = (self.N, self.M) initial_memory = self.initial_state[0:-2] np_initial_read_address = self.initial_state[-2] np_initial_write_address = self.initial_state[-1] tf_mem_prev = tf.stack(initial_memory, axis=1) np_mem_prev = np.stack(initial_memory, axis=1) # Only want the first batch element and first time slice from the # controller output to produce the read and write head values from a # single timestep. np_read_head, np_write_head = head_pieces(self.controller_output[0, 0, :], mem_size, self.S) np_read_ops_out = generate_address(np_read_head, np_initial_read_address[0, :], np_mem_prev[0, :, :], self.N, self.S) np_write_ops_out = generate_address(np_write_head[0:-2], np_initial_write_address[0, :], np_mem_prev[0, :, :], self.N, self.S) with self.test_session() as session: # The TF head pieces method takes in a single time slice from an entire # batch of controller data and spits out the read/write head values for # all batch items at that time slice. tf_write_head, tf_read_head = \ NTMCell.head_pieces(self.controller_output[:, 0, :], mem_size, self.S) tf_read_ops_out = address_regression(tf_read_head, self.initial_state[-2], tf_mem_prev, self.N, self.S) tf_write_ops_out = address_regression(tf_write_head[0:-2], self.initial_state[-1], tf_mem_prev, self.N, self.S) tf_write_ops_out = session.run(tf_write_ops_out) tf_read_ops_out = session.run(tf_read_ops_out) self.assertEqual(len(tf_read_ops_out), len(np_read_ops_out)) self.assertEqual(len(tf_write_ops_out), len(np_write_ops_out)) for i in range(len(np_read_ops_out)): self.assertArrayNear(tf_read_ops_out[i][0], np_read_ops_out[i], err=1e-8) self.assertArrayNear(tf_write_ops_out[i][0], np_write_ops_out[i], err=1e-8)
def testHeadPieces(self): ''' Show that the values extracted from the controller (key, gate, shift, etc.) are correct. ''' mem_size = (self.N, self.M) np_read_head, np_write_head = head_pieces(self.controller_output, mem_size, self.S) with self.test_session() as session: tf_write_head, tf_read_head = NTMCell.head_pieces(self.controller_output, mem_size, self.S, axis=2) tf_write_head, tf_read_head = session.run([tf_write_head, tf_read_head]) # Make sure we got the same number of items from the read and write # heads. self.assertEqual(len(tf_write_head), len(np_write_head)) self.assertEqual(len(tf_read_head), len(np_read_head)) # Verify that the NP and TF read heads have approximately the same # values. for i in range(len(np_read_head)): for j in range(np_read_head[i].shape[0]): for k in range(np_read_head[i].shape[1]): self.assertArrayNear(np_read_head[i][j, k, :], tf_read_head[i][j, k, :], err=1e-8) # Verify that the NP and TF write heads have approximately the same # values. for i in range(len(np_write_head)): for j in range(np_write_head[i].shape[0]): for k in range(np_write_head[i].shape[1]): self.assertArrayNear(np_write_head[i][j, k, :], tf_write_head[i][j, k, :], err=1e-8)
def __init__(self, mem_size, input_size, output_size, session, num_heads=1, shift_range=3, name="NTM"): ''' Builds the computation graph for the Neural Turing Machine. The tasks from the original paper call for the NTM to take in a sequence of arrays, and produce some output. Let B = batch size, T = sequence length, and L = array length, then a single input sequence is a matrix of size [TxL]. A batch of these input sequences has size [BxTxL]. Arguments: mem_size - Tuple of integers corresponding to the number of storage locations and the dimension of each storage location (in the paper the memory matrix is NxM, mem_size refers to (N, M)). input_size - Integer number of elements in a single input vector (the value L). output_size - Integer number of elements in a single output vector. session - The TensorFlow session object that refers to the current computation graph. num_heads - The integer number of write heads the NTM uses (future feature). shift_range - The integer number of shift values that the read/write heads can perform, which corresponds to the direction and magnitude of the allowable shifts. Shift ranges and corresponding available shift directions/magnitudes: 3 => [-1, 0, 1] 4 => [-2, -1, 0, 1] 5 => [-2, -1, 0, 1, 2] name - A string name for the variable scope, for troubleshooting. ''' self.num_heads = 1 self.sess = session self.S = shift_range self.N, self.M = mem_size self.in_size = input_size self.out_size = output_size num_lstm_units = 100 self.dt=tf.float32 dt = self.dt N = self.N M = self.M S = self.S num_heads = self.num_heads with tf.variable_scope(name): self.feed_in = tf.placeholder(dtype=dt, shape=(None, None, input_size)) self.feed_out = tf.placeholder(dtype=dt, shape=(None, None, output_size)) self.feed_learning_rate = tf.placeholder(dtype=dt, shape=()) batch_size = tf.shape(self.feed_in)[0] seq_length = tf.shape(self.feed_in)[1] head_raw = self.controller(self.feed_in, batch_size, seq_length) self.ntm_cell = NTMCell(mem_size=(N, M), num_shifts=S) write_head, read_head = NTMCell.head_pieces( head_raw, mem_size=(N, M), num_shifts=S, axis=2) self.write_head, self.read_head = \ head_pieces_tuple_to_dict(write_head, read_head) self.ntm_init_state = tuple( [tf.placeholder(dtype=dt, shape=(None, s)) \ for s in self.ntm_cell.state_size]) self.ntm_reads, self.ntm_last_state = tf.nn.dynamic_rnn( cell=self.ntm_cell, initial_state=self.ntm_init_state, inputs=head_raw, dtype=dt) self.w_read = self.ntm_last_state[-2] self.w_write = self.ntm_last_state[-1] ntm_reads_flat = tf.reshape(self.ntm_reads, [-1, M]) L = tf.Variable(tf.random_normal([M, output_size])) b_L = tf.Variable(tf.random_normal([output_size,])) logits_flat = tf.matmul(ntm_reads_flat, L) + b_L targets_flat = tf.reshape(self.feed_out, [-1, output_size]) self.error = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( labels=targets_flat, logits=logits_flat)) self.predictions = tf.sigmoid( tf.reshape(logits_flat, [batch_size, seq_length, output_size])) optimizer = tf.train.RMSPropOptimizer( learning_rate=self.feed_learning_rate, momentum=0.9) grads_and_vars = optimizer.compute_gradients(self.error) capped_grads = [(tf.clip_by_value(grad, -10., 10.), var) \ for grad, var in grads_and_vars] self.train_op = optimizer.apply_gradients(capped_grads)
class NTMRegression(object): ''' A class that makes regression testing on the NTMCell easier ''' def __init__(self, mem_size, session, num_heads=1, shift_range=3, name="NTM"): ''' Just sets up an NTM without the controller. So all this will do is apply the NTM operations to some set of fake input. ''' self.mem_size = mem_size self.shift_range = shift_range self.sess = session self.num_heads = num_heads (_, num_bits) = self.mem_size dt = tf.float32 head_size = 4*num_bits + 2*self.shift_range + 6 with tf.variable_scope(name): self.ntm_cell = NTMCell(mem_size=self.mem_size, num_shifts=self.shift_range) # [batch_size, sequence_length, 4*M + 2*S + 6] self.feed_controller_input = \ tf.placeholder(dtype=dt, shape=(None, None, head_size)) # ([batch_size, ntm_cell.state_size[0]], ...) self.feed_initial_state = \ tuple([tf.placeholder(dtype=dt, shape=(None, s)) for s in self.ntm_cell.state_size]) self.ntm_reads, self.ntm_last_state = \ tf.nn.dynamic_rnn(cell=self.ntm_cell, initial_state=self.feed_initial_state, inputs=self.feed_controller_input, dtype=dt) self.write_head, self.read_head = \ self.ntm_cell.head_pieces(self.feed_controller_input, mem_size=self.mem_size, num_shifts=self.shift_range, axis=2) def run(self, controller_input, initial_state): ''' Takes some controller input and initial state and spits out read/write addresses and values that are read from a memory matrix. ''' (_, seq_length, _) = controller_input.shape sequences = np.split(controller_input, seq_length, axis=1) init_state = initial_state read_addresses = [] write_addresses = [] sequence_reads = [] for seq in sequences: fetches = [self.ntm_reads, self.ntm_last_state] feeds = {self.feed_controller_input: seq} for i in range(len(init_state)): feeds[self.feed_initial_state[i]] = init_state[i] reads, last_state = self.sess.run(fetches, feeds) sequence_reads.append(reads) read_addresses.append(last_state[-2]) write_addresses.append(last_state[-1]) init_state = last_state read_addresses = \ np.transpose(np.squeeze(np.array(read_addresses)), [1, 0, 2]) write_addresses = \ np.transpose(np.squeeze(np.array(write_addresses)), [1, 0, 2]) sequence_reads = \ np.transpose(np.squeeze(np.array(sequence_reads)), [1, 0, 2]) return read_addresses, write_addresses, sequence_reads
def __init__(self, mem_size, input_size, output_size, session, num_heads=1, shift_range=3, name="NTM"): self.num_heads = 1 self.sess = session self.S = shift_range self.N, self.M = mem_size self.in_size = input_size self.out_size = output_size num_lstm_units = 100 self.dt = tf.float32 self.pi = 64 pi = self.pi dt = self.dt N = self.N M = self.M S = self.S num_heads = self.num_heads with tf.variable_scope(name): self.feed_in = tf.placeholder(dtype=dt, shape=(None, None, input_size)) self.feed_out = tf.placeholder(dtype=dt, shape=(None, None, output_size)) self.feed_learning_rate = tf.placeholder(dtype=dt, shape=()) batch_size = tf.shape(self.feed_in)[0] seq_length = tf.shape(self.feed_in)[1] head_raw = self.controller(self.feed_in, batch_size, seq_length) self.ntm_cell = NTMCell(mem_size=(N, M), shift_range=S) self.write_head, self.read_head = NTMCell.head_pieces( head_raw, mem_size=(N, M), shift_range=S, axis=2, style='dict') self.ntm_init_state = tuple( [tf.placeholder(dtype=dt, shape=(None, s)) \ for s in self.ntm_cell.state_size]) self.ntm_reads, self.ntm_last_state = tf.nn.dynamic_rnn( cell=self.ntm_cell, initial_state=self.ntm_init_state, inputs=head_raw, dtype=dt, parallel_iterations=pi) # Started conversion to the multi-head output here, still have # lots to do. self.w_read = self.ntm_last_state[N:N + num_heads] self.w_write = self.ntm_last_state[N + num_heads:N + 2 * num_heads] ntm_reads_flat = [tf.reshape(r, [-1, M]) for r in self.ntm_reads] L = tf.Variable(tf.random_normal([M, output_size])) b_L = tf.Variable(tf.random_normal([ output_size, ])) logits_flat = tf.matmul(ntm_reads_flat, L) + b_L targets_flat = tf.reshape(self.feed_out, [-1, output_size]) self.error = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=targets_flat, logits=logits_flat)) self.predictions = tf.sigmoid( tf.reshape(logits_flat, [batch_size, seq_length, output_size])) optimizer = tf.train.RMSPropOptimizer( learning_rate=self.feed_learning_rate, momentum=0.9) grads_and_vars = optimizer.compute_gradients(self.error) capped_grads = [(tf.clip_by_value(grad, -10., 10.), var) \ for grad, var in grads_and_vars] self.train_op = optimizer.apply_gradients(capped_grads)