def testOps(self):
    '''
    Verify that each of the operations (convolution, gating, etc.) are
    correct.
    Only compare the output from a single batch element and single time
    slice.
    '''

    mem_size = (self.N, self.M)
    initial_memory = self.initial_state[0:-2]
    np_initial_read_address = self.initial_state[-2]
    np_initial_write_address = self.initial_state[-1]
    tf_mem_prev = tf.stack(initial_memory, axis=1)
    np_mem_prev = np.stack(initial_memory, axis=1)
    # Only want the first batch element and first time slice from the
    # controller output to produce the read and write head values from a
    # single timestep.
    np_read_head, np_write_head = head_pieces(self.controller_output[0, 0, :],
                                              mem_size, self.S)

    np_read_ops_out = generate_address(np_read_head,
                                       np_initial_read_address[0, :],
                                       np_mem_prev[0, :, :],
                                       self.N,
                                       self.S)
    np_write_ops_out = generate_address(np_write_head[0:-2],
                                        np_initial_write_address[0, :],
                                        np_mem_prev[0, :, :],
                                        self.N,
                                        self.S)

    with self.test_session() as session:
      # The TF head pieces method takes in a single time slice from an entire
      # batch of controller data and spits out the read/write head values for
      # all batch items at that time slice.
      tf_write_head, tf_read_head = \
        NTMCell.head_pieces(self.controller_output[:, 0, :], mem_size, self.S)
      tf_read_ops_out = address_regression(tf_read_head,
                                           self.initial_state[-2],
                                           tf_mem_prev,
                                           self.N,
                                           self.S)
      tf_write_ops_out = address_regression(tf_write_head[0:-2],
                                            self.initial_state[-1],
                                            tf_mem_prev,
                                            self.N,
                                            self.S)

      tf_write_ops_out = session.run(tf_write_ops_out)
      tf_read_ops_out = session.run(tf_read_ops_out)

      self.assertEqual(len(tf_read_ops_out), len(np_read_ops_out))
      self.assertEqual(len(tf_write_ops_out), len(np_write_ops_out))

      for i in range(len(np_read_ops_out)):
        self.assertArrayNear(tf_read_ops_out[i][0], np_read_ops_out[i],
                             err=1e-8)
        self.assertArrayNear(tf_write_ops_out[i][0], np_write_ops_out[i],
                             err=1e-8)
  def testHeadPieces(self):
    '''
    Show that the values extracted from the controller (key, gate, shift, etc.)
    are correct.
    '''

    mem_size = (self.N, self.M)
    np_read_head, np_write_head = head_pieces(self.controller_output,
                                              mem_size,
                                              self.S)

    with self.test_session() as session:
      tf_write_head, tf_read_head = NTMCell.head_pieces(self.controller_output,
                                                        mem_size,
                                                        self.S,
                                                        axis=2)
      tf_write_head, tf_read_head = session.run([tf_write_head, tf_read_head])

      # Make sure we got the same number of items from the read and write
      # heads.
      self.assertEqual(len(tf_write_head), len(np_write_head))
      self.assertEqual(len(tf_read_head), len(np_read_head))

      # Verify that the NP and TF read heads have approximately the same
      # values.
      for i in range(len(np_read_head)):
        for j in range(np_read_head[i].shape[0]):
          for k in range(np_read_head[i].shape[1]):
            self.assertArrayNear(np_read_head[i][j, k, :],
                                 tf_read_head[i][j, k, :],
                                 err=1e-8)

      # Verify that the NP and TF write heads have approximately the same
      # values.
      for i in range(len(np_write_head)):
        for j in range(np_write_head[i].shape[0]):
          for k in range(np_write_head[i].shape[1]):
            self.assertArrayNear(np_write_head[i][j, k, :],
                                 tf_write_head[i][j, k, :],
                                 err=1e-8)
Beispiel #3
0
    def __init__(self, mem_size, input_size, output_size, session,
                 num_heads=1, shift_range=3, name="NTM"):
        '''
        Builds the computation graph for the Neural Turing Machine.
        The tasks from the original paper call for the NTM to take in a
        sequence of arrays, and produce some output.
        Let B = batch size, T = sequence length, and L = array length, then
        a single input sequence is a matrix of size [TxL]. A batch of these
        input sequences has size [BxTxL].

        Arguments:
          mem_size - Tuple of integers corresponding to the number of storage
            locations and the dimension of each storage location (in the paper
            the memory matrix is NxM, mem_size refers to (N, M)).
          input_size - Integer number of elements in a single input vector
            (the value L).
          output_size - Integer number of elements in a single output vector.
          session - The TensorFlow session object that refers to the current
            computation graph.
          num_heads - The integer number of write heads the NTM uses (future
            feature).
          shift_range - The integer number of shift values that the read/write 
            heads can perform, which corresponds to the direction and magnitude
            of the allowable shifts.
            Shift ranges and corresponding available shift
            directions/magnitudes:
              3 => [-1, 0, 1]
              4 => [-2, -1, 0, 1] 
              5 => [-2, -1, 0, 1, 2]
          name - A string name for the variable scope, for troubleshooting.
        '''

        self.num_heads = 1
        self.sess = session
        self.S = shift_range
        self.N, self.M = mem_size
        self.in_size = input_size
        self.out_size = output_size

        num_lstm_units = 100
        self.dt=tf.float32

        dt = self.dt
        N = self.N
        M = self.M
        S = self.S
        num_heads = self.num_heads

        with tf.variable_scope(name):
            self.feed_in = tf.placeholder(dtype=dt,
                shape=(None, None, input_size))

            self.feed_out = tf.placeholder(dtype=dt,
                shape=(None, None, output_size))

            self.feed_learning_rate = tf.placeholder(dtype=dt, 
                shape=())

            batch_size = tf.shape(self.feed_in)[0]
            seq_length = tf.shape(self.feed_in)[1]

            head_raw = self.controller(self.feed_in, batch_size, seq_length)

            self.ntm_cell = NTMCell(mem_size=(N, M), num_shifts=S)

            write_head, read_head = NTMCell.head_pieces(
                head_raw, mem_size=(N, M), num_shifts=S, axis=2)

            self.write_head, self.read_head = \
                head_pieces_tuple_to_dict(write_head, read_head)

            self.ntm_init_state = tuple(
                [tf.placeholder(dtype=dt, shape=(None, s)) \
                for s in self.ntm_cell.state_size])

            self.ntm_reads, self.ntm_last_state = tf.nn.dynamic_rnn(
                cell=self.ntm_cell, initial_state=self.ntm_init_state,
                inputs=head_raw, dtype=dt)

            self.w_read = self.ntm_last_state[-2]
            self.w_write = self.ntm_last_state[-1]

            ntm_reads_flat = tf.reshape(self.ntm_reads, [-1, M])

            L = tf.Variable(tf.random_normal([M, output_size]))
            b_L = tf.Variable(tf.random_normal([output_size,]))

            logits_flat = tf.matmul(ntm_reads_flat, L) + b_L
            targets_flat = tf.reshape(self.feed_out, [-1, output_size])

            self.error = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(
                    labels=targets_flat, logits=logits_flat))

            self.predictions = tf.sigmoid(
                tf.reshape(logits_flat, [batch_size, seq_length, output_size]))

            optimizer = tf.train.RMSPropOptimizer(
                learning_rate=self.feed_learning_rate, momentum=0.9)

            grads_and_vars = optimizer.compute_gradients(self.error)
            capped_grads = [(tf.clip_by_value(grad, -10., 10.), var) \
                for grad, var in grads_and_vars]

            self.train_op = optimizer.apply_gradients(capped_grads)
class NTMRegression(object):
  '''
  A class that makes regression testing on the NTMCell easier
  '''
  def __init__(self, mem_size, session, num_heads=1, shift_range=3,
               name="NTM"):
    '''
    Just sets up an NTM without the controller. So all this will do is
    apply the NTM operations to some set of fake input.
    '''

    self.mem_size = mem_size
    self.shift_range = shift_range
    self.sess = session
    self.num_heads = num_heads

    (_, num_bits) = self.mem_size
    dt = tf.float32

    head_size = 4*num_bits + 2*self.shift_range + 6

    with tf.variable_scope(name):

      self.ntm_cell = NTMCell(mem_size=self.mem_size,
                              num_shifts=self.shift_range)

      # [batch_size, sequence_length, 4*M + 2*S + 6]
      self.feed_controller_input = \
        tf.placeholder(dtype=dt,
                       shape=(None, None, head_size))

      # ([batch_size, ntm_cell.state_size[0]], ...)
      self.feed_initial_state = \
        tuple([tf.placeholder(dtype=dt, shape=(None, s))
               for s in self.ntm_cell.state_size])

      self.ntm_reads, self.ntm_last_state = \
        tf.nn.dynamic_rnn(cell=self.ntm_cell,
                          initial_state=self.feed_initial_state,
                          inputs=self.feed_controller_input, dtype=dt)

      self.write_head, self.read_head = \
        self.ntm_cell.head_pieces(self.feed_controller_input,
                                  mem_size=self.mem_size,
                                  num_shifts=self.shift_range, axis=2)


  def run(self, controller_input, initial_state):
    '''
    Takes some controller input and initial state and spits out read/write
    addresses and values that are read from a memory matrix.
    '''

    (_, seq_length, _) = controller_input.shape
    sequences = np.split(controller_input, seq_length, axis=1)
    init_state = initial_state

    read_addresses = []
    write_addresses = []
    sequence_reads = []

    for seq in sequences:
      fetches = [self.ntm_reads, self.ntm_last_state]
      feeds = {self.feed_controller_input: seq}

      for i in range(len(init_state)):
        feeds[self.feed_initial_state[i]] = init_state[i]

      reads, last_state = self.sess.run(fetches, feeds)

      sequence_reads.append(reads)
      read_addresses.append(last_state[-2])
      write_addresses.append(last_state[-1])

      init_state = last_state

    read_addresses = \
      np.transpose(np.squeeze(np.array(read_addresses)), [1, 0, 2])
    write_addresses = \
      np.transpose(np.squeeze(np.array(write_addresses)), [1, 0, 2])
    sequence_reads = \
      np.transpose(np.squeeze(np.array(sequence_reads)), [1, 0, 2])

    return read_addresses, write_addresses, sequence_reads
    def __init__(self,
                 mem_size,
                 input_size,
                 output_size,
                 session,
                 num_heads=1,
                 shift_range=3,
                 name="NTM"):

        self.num_heads = 1
        self.sess = session
        self.S = shift_range
        self.N, self.M = mem_size
        self.in_size = input_size
        self.out_size = output_size

        num_lstm_units = 100
        self.dt = tf.float32
        self.pi = 64

        pi = self.pi
        dt = self.dt
        N = self.N
        M = self.M
        S = self.S
        num_heads = self.num_heads

        with tf.variable_scope(name):
            self.feed_in = tf.placeholder(dtype=dt,
                                          shape=(None, None, input_size))

            self.feed_out = tf.placeholder(dtype=dt,
                                           shape=(None, None, output_size))

            self.feed_learning_rate = tf.placeholder(dtype=dt, shape=())

            batch_size = tf.shape(self.feed_in)[0]
            seq_length = tf.shape(self.feed_in)[1]

            head_raw = self.controller(self.feed_in, batch_size, seq_length)

            self.ntm_cell = NTMCell(mem_size=(N, M), shift_range=S)

            self.write_head, self.read_head = NTMCell.head_pieces(
                head_raw, mem_size=(N, M), shift_range=S, axis=2, style='dict')

            self.ntm_init_state = tuple(
                [tf.placeholder(dtype=dt, shape=(None, s)) \
                for s in self.ntm_cell.state_size])

            self.ntm_reads, self.ntm_last_state = tf.nn.dynamic_rnn(
                cell=self.ntm_cell,
                initial_state=self.ntm_init_state,
                inputs=head_raw,
                dtype=dt,
                parallel_iterations=pi)

            # Started conversion to the multi-head output here, still have
            # lots to do.
            self.w_read = self.ntm_last_state[N:N + num_heads]
            self.w_write = self.ntm_last_state[N + num_heads:N + 2 * num_heads]

            ntm_reads_flat = [tf.reshape(r, [-1, M]) for r in self.ntm_reads]

            L = tf.Variable(tf.random_normal([M, output_size]))
            b_L = tf.Variable(tf.random_normal([
                output_size,
            ]))

            logits_flat = tf.matmul(ntm_reads_flat, L) + b_L
            targets_flat = tf.reshape(self.feed_out, [-1, output_size])

            self.error = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(labels=targets_flat,
                                                        logits=logits_flat))

            self.predictions = tf.sigmoid(
                tf.reshape(logits_flat, [batch_size, seq_length, output_size]))

            optimizer = tf.train.RMSPropOptimizer(
                learning_rate=self.feed_learning_rate, momentum=0.9)

            grads_and_vars = optimizer.compute_gradients(self.error)
            capped_grads = [(tf.clip_by_value(grad, -10., 10.), var) \
                for grad, var in grads_and_vars]

            self.train_op = optimizer.apply_gradients(capped_grads)