예제 #1
0
def create_ntm(config, sess, **ntm_args):
    if config.rand_hyper:
        hyper_params = {}
        if config.is_test:
            hyper_params = load_hyperparamters(config)
        else:
            hyper_params = generate_hyperparams(config)
        print(" [*] Hyperparameters: {}".format(hyper_params))
        cell = NTMCell(input_dim=config.input_dim,
                       output_dim=config.output_dim,
                       controller_layer_size=hyper_params["c_layer"],
                       controller_dim=hyper_params["c_dim"],
                       mem_size=hyper_params["mem_size"],
                       write_head_size=config.write_head_size,
                       read_head_size=config.read_head_size,
                       is_LSTM_mode=config.is_LSTM_mode)
        scope = ntm_args.pop('scope', 'NTM-%s' % config.task)

        # Description + query + plan + answer
        min_length = (config.min_size -
                      1) + 1 + config.plan_length + (config.min_size - 1)
        max_length = int(((config.max_size * (config.max_size - 1) / 2) + 1 +
                          config.plan_length + (config.max_size - 1)))
        ntm = NTM(cell,
                  sess,
                  min_length,
                  max_length,
                  config.min_size,
                  config.max_size,
                  scope=scope,
                  **ntm_args,
                  lr=hyper_params["lr"],
                  momentum=hyper_params["momentum"],
                  decay=hyper_params["decay"],
                  beta=hyper_params["l2"])

    else:
        cell = NTMCell(input_dim=config.input_dim,
                       output_dim=config.output_dim,
                       controller_layer_size=config.controller_layer_size,
                       controller_dim=config.controller_dim,
                       write_head_size=config.write_head_size,
                       read_head_size=config.read_head_size,
                       is_LSTM_mode=config.is_LSTM_mode)
        scope = ntm_args.pop('scope', 'NTM-%s' % config.task)

        # Description + query + plan + answer
        min_length = (config.min_size -
                      1) + 1 + config.plan_length + (config.min_size - 1)
        max_length = int(((config.max_size * (config.max_size - 1) / 2) + 1 +
                          config.plan_length + (config.max_size - 1)))
        ntm = NTM(cell,
                  sess,
                  min_length,
                  max_length,
                  config.min_size,
                  config.max_size,
                  scope=scope,
                  **ntm_args)
    return cell, ntm
def inference(images_t, last_labels_t):
    a, time_steps, width = images_t.get_shape().as_list()
    b, c, num_labels = last_labels_t.get_shape().as_list()
    with tf.variable_scope("rnn"):
        images_t = tf.reshape(images_t, (-1, time_steps, width))
        rnn_inputs_t = tf.concat((images_t, last_labels_t), 2)
        #keep_prob=tf.placeholder(tf.float32)
        #rnn_inputs_t = tf.nn.dropout(rnn_inputs, keep_prob)
        if CELL_TYPE == 'lstm':
            rnn_cell = tf.contrib.rnn.LSTMCell(LSTM_STATE_SIZE,
                                               activation=tf.nn.tanh)
        elif CELL_TYPE == 'ntm':
            print 'ntm'
            rnn_cell = NTMCell(memory_slots=128,
                               memory_width=40,
                               controller_size=LSTM_STATE_SIZE)
        rnn_output_t, rnn_final_state_t = tf.nn.dynamic_rnn(rnn_cell,
                                                            rnn_inputs_t,
                                                            time_major=False,
                                                            dtype=tf.float32,
                                                            swap_memory=False)
        #dynami-rnn is to automatically unroll lstm
    rnn_output_size = rnn_output_t.get_shape().as_list()[-1]
    W_t = tf.get_variable("W", (rnn_output_size, num_labels),
                          initializer=tf.random_normal_initializer(stddev=0.1))
    b_t = tf.get_variable("b", (num_labels),
                          initializer=tf.constant_initializer(0.0))
    logits_t = tf.matmul(tf.reshape(rnn_output_t,
                                    (-1, rnn_output_size)), W_t) + b_t
    logits_t = tf.reshape(logits_t, (-1, time_steps, num_labels))

    return logits_t
예제 #3
0
def inference(images_t, last_labels_t):
    _, time_steps, height, width = images_t.get_shape().as_list()
    _, _, num_labels = last_labels_t.get_shape().as_list()

    with tf.variable_scope("rnn"):
        images_t = tf.reshape(images_t, (-1, time_steps, height * width))
        rnn_inputs_t = tf.concat(2, (images_t, last_labels_t))
        if CELL_TYPE == 'lstm':
            rnn_cell = tf.nn.rnn_cell.BasicLSTMCell(LSTM_STATE_SIZE)
        elif CELL_TYPE == 'ntm':
            print 'ntm'
            rnn_cell = NTMCell(memory_slots=128,
                               memory_width=40,
                               controller_size=LSTM_STATE_SIZE)
        rnn_output_t, rnn_final_state_t = tf.nn.dynamic_rnn(rnn_cell,
                                                            rnn_inputs_t,
                                                            time_major=False,
                                                            dtype=tf.float32,
                                                            swap_memory=False)

    with tf.variable_scope("fcout"):
        rnn_output_size = rnn_output_t.get_shape().as_list()[-1]
        W_t = tf.get_variable(
            "W", (rnn_output_size, num_labels),
            initializer=tf.random_normal_initializer(stddev=0.1))
        b_t = tf.get_variable("b", (num_labels),
                              initializer=tf.constant_initializer(0.0))
        logits_t = tf.matmul(tf.reshape(rnn_output_t,
                                        (-1, rnn_output_size)), W_t) + b_t
        logits_t = tf.reshape(logits_t, (-1, time_steps, num_labels))

    return logits_t
예제 #4
0
def predict_train(config, sess):
    """Train an NTM for the copy task given a TensorFlow session, which is a
    connection to the C++ backend"""

    if not os.path.isdir(config.checkpoint_dir):
        raise Exception(" [!] Directory %s not found" % config.checkpoint_dir)

    # delimiter flag-like vector inputs indicating the start and end
    # you can see these in the figure examples in the README
    # this is kind of defined redundantly
    start_symbol = np.zeros([config.input_dim], dtype=np.float32)
    start_symbol[0] = 1
    end_symbol = np.zeros([config.input_dim], dtype=np.float32)
    end_symbol[1] = 1

    # initialise the neural turing machine and the neural-net controller thing
    cell = NTMCell(input_dim=config.input_dim,
                   output_dim=config.output_dim,
                   controller_layer_size=config.controller_layer_size,
                   write_head_size=config.write_head_size,
                   read_head_size=config.read_head_size)
    ntm = NTM(cell, sess, config.min_length, config.max_length*3)

    print(" [*] Initialize all variables")
    tf.initialize_all_variables().run()
    print(" [*] Initialization finished")

    start_time = time.time()
    for idx in xrange(config.epoch):
        # generate a sequence of random length
        seq_length = randint(config.min_length, config.max_length) * 4
        inc_seq, comp_seq = generate_predict_sequence(seq_length, config.input_dim - 2)

        # this somehow associates the desired inputs and outputs with the NTM
        feed_dict = {input_:vec for vec, input_ in zip(inc_seq, ntm.inputs)}
        feed_dict.update(
            {true_output:vec for vec, true_output in zip(comp_seq, ntm.true_outputs)}
        )
        feed_dict.update({
            ntm.start_symbol: start_symbol,
            ntm.end_symbol: end_symbol
        })

        # this runs the session and returns the current training loss and step
        # I'm kind of surprised it returns the step, but whatevs
        _, cost, step = sess.run([ntm.optims[seq_length],
                                  ntm.get_loss(seq_length),
                                  ntm.global_step], feed_dict=feed_dict)

        # how does one use these checkpoints?
        if idx % 100 == 0:
            ntm.save(config.checkpoint_dir, 'copy', step)

        if idx % print_interval == 0:
            print("[%5d] %2d: %.2f (%.1fs)" \
                % (idx, seq_length, cost, time.time() - start_time))

    print("Training Copy task finished")
    return cell, ntm
예제 #5
0
def create_ntm(FLAGS, sess, **ntm_args):
    cell = NTMCell(
        input_dim=FLAGS.input_dim,
        output_dim=FLAGS.output_dim,
        controller_layer_size=FLAGS.controller_layer_size,
        write_head_size=FLAGS.write_head_size,
        read_head_size=FLAGS.read_head_size)
    ntm = NTM(
        cell, sess, FLAGS.min_length, FLAGS.max_length,
        test_max_length=FLAGS.test_max_length, scope='NTM-%s' % FLAGS.task, **ntm_args)
    return cell, ntm
예제 #6
0
def copy_train(config):
    sess = config.sess

    if not os.path.isdir(config.checkpoint_dir):
        raise Exception(" [!] Directory %s not found" % config.checkpoint_dir)

    # delimiter flag for start and end
    start_symbol = np.zeros([config.input_dim], dtype=np.float32)
    start_symbol[0] = 1
    end_symbol = np.zeros([config.input_dim], dtype=np.float32)
    end_symbol[1] = 1

    cell = NTMCell(input_dim=config.input_dim,
                   output_dim=config.output_dim,
                   controller_layer_size=config.controller_layer_size,
                   write_head_size=config.write_head_size,
                   read_head_size=config.read_head_size)
    ntm = NTM(cell, sess, config.min_length, config.max_length)

    print(" [*] Initialize all variables")
    tf.initialize_all_variables().run()
    print(" [*] Initialization finished")

    start_time = time.time()
    for idx in xrange(config.epoch):
        seq_length = randint(config.min_length, config.max_length)
        seq = generate_copy_sequence(seq_length, config.input_dim - 2)

        feed_dict = {input_: vec for vec, input_ in zip(seq, ntm.inputs)}
        feed_dict.update({
            true_output: vec
            for vec, true_output in zip(seq, ntm.true_outputs)
        })
        feed_dict.update({
            ntm.start_symbol: start_symbol,
            ntm.end_symbol: end_symbol
        })

        _, cost, step = sess.run([
            ntm.optims[seq_length],
            ntm.get_loss(seq_length), ntm.global_step
        ],
                                 feed_dict=feed_dict)

        if idx % 100 == 0:
            ntm.save(config.checkpoint_dir, 'copy', step)

        if idx % print_interval == 0:
            print("[%5d] %2d: %.2f (%.1fs)" \
                % (idx, seq_length, cost, time.time() - start_time))

    print("Training Copy task finished")
    return cell, ntm
예제 #7
0
def create_ntm(config, sess, **ntm_args):
    cell = NTMCell(
        input_dim=config.input_dim,
        output_dim=config.output_dim,
        controller_layer_size=config.controller_layer_size,
        controller_dim=config.controller_dim,
        write_head_size=config.write_head_size,
        read_head_size=config.read_head_size)
    scope = ntm_args.pop('scope', 'NTM-%s' % config.task)
    ntm = NTM(
        cell, sess, config.min_length, config.max_length,
        test_max_length=config.test_max_length, scope=scope, **ntm_args)
    return cell, ntm
  def setUp(self):
    '''
    Define the parameters that will be used to create the NP forward pass,
    then perform the NP forward pass.
    '''

    # Parameter definitions
    min_addresses = 5
    max_addresses = 10
    min_bits_per_address = 6
    max_bits_per_address = 12
    max_batch_size = 32
    min_batch_size = 10

    self.N = np.random.randint(low=min_addresses, high=max_addresses + 1)
    self.M = np.random.randint(low=min_bits_per_address,
                               high=max_bits_per_address + 1)
    #self.N, self.M = (10, 9)
    self.mem_size = (self.N, self.M)

    min_shifts = 3
    max_shifts = self.N - 1

    self.S = np.random.randint(low=min_shifts, high=max_shifts + 1)
    self.shift_range = self.S
    self.batch_size = np.random.randint(low=min_batch_size,
                                        high=max_batch_size)
    self.sequence_length = np.random.randint(low=3, high=max_addresses)
    #self.S, self.batch_size, self.sequence_length = (3, 12, 15)

    self.initial_state = NTMCell(self.mem_size,
                                 self.shift_range).bias_state(self.batch_size)

    self.controller_output = 10*np.random.rand(self.batch_size,
                                               self.sequence_length,
                                               4*self.M + 2*self.S + 6) - 5

    # Get the reference NP output for a single sequence (only one of the
    # batch items gets processed to completion).
    seq_initial_state = tuple([x[0, :] for x in self.initial_state])

    self.np_read_addresses, self.np_write_addresses, self.np_reads = \
      numpy_forward_pass(self.N,
                         self.M,
                         self.S,
                         seq_initial_state,
                         self.controller_output[0, :, :])
  def __init__(self, mem_size, session, num_heads=1, shift_range=3,
               name="NTM"):
    '''
    Just sets up an NTM without the controller. So all this will do is
    apply the NTM operations to some set of fake input.
    '''

    self.mem_size = mem_size
    self.shift_range = shift_range
    self.sess = session
    self.num_heads = num_heads

    (_, num_bits) = self.mem_size
    dt = tf.float32

    head_size = 4*num_bits + 2*self.shift_range + 6

    with tf.variable_scope(name):

      self.ntm_cell = NTMCell(mem_size=self.mem_size,
                              num_shifts=self.shift_range)

      # [batch_size, sequence_length, 4*M + 2*S + 6]
      self.feed_controller_input = \
        tf.placeholder(dtype=dt,
                       shape=(None, None, head_size))

      # ([batch_size, ntm_cell.state_size[0]], ...)
      self.feed_initial_state = \
        tuple([tf.placeholder(dtype=dt, shape=(None, s))
               for s in self.ntm_cell.state_size])

      self.ntm_reads, self.ntm_last_state = \
        tf.nn.dynamic_rnn(cell=self.ntm_cell,
                          initial_state=self.feed_initial_state,
                          inputs=self.feed_controller_input, dtype=dt)

      self.write_head, self.read_head = \
        self.ntm_cell.head_pieces(self.feed_controller_input,
                                  mem_size=self.mem_size,
                                  num_shifts=self.shift_range, axis=2)
예제 #10
0
    def _initialize(self, observation_t):
        image_t, last_label_t, _ = observation_t
        self.batch_size_t = tf.unpack(tf.shape(image_t))[0]
        _, self.image_height, self.image_width = image_t.get_shape().as_list()
        _, self.num_actions = last_label_t.get_shape().as_list()
        self.num_actions += 1  # for "pay for label"

        with tf.variable_scope("rnn"):
            if CELL_TYPE == 'lstm':
                self.rnn_cell = tf.nn.rnn_cell.BasicLSTMCell(LSTM_STATE_SIZE)
            elif CELL_TYPE == 'ntm':
                print 'ntm'
                self.rnn_cell = NTMCell(memory_slots=128,
                                        memory_width=40,
                                        controller_size=LSTM_STATE_SIZE)
            #self.rnn_cell = tf.nn.rnn_cell.BasicLSTMCell(LSTM_STATE_SIZE, state_is_tuple=True)
            self.rnn_state_t = self.rnn_cell.zero_state(
                self.batch_size_t, tf.float32)

        self.q_t = self._Q(observation_t)
        self.a_t = None
        self.initialized = True
예제 #11
0
    def __init__(self, mem_size, input_size, output_size, session,
                 num_heads=1, shift_range=3, name="NTM"):
        '''
        Builds the computation graph for the Neural Turing Machine.
        The tasks from the original paper call for the NTM to take in a
        sequence of arrays, and produce some output.
        Let B = batch size, T = sequence length, and L = array length, then
        a single input sequence is a matrix of size [TxL]. A batch of these
        input sequences has size [BxTxL].

        Arguments:
          mem_size - Tuple of integers corresponding to the number of storage
            locations and the dimension of each storage location (in the paper
            the memory matrix is NxM, mem_size refers to (N, M)).
          input_size - Integer number of elements in a single input vector
            (the value L).
          output_size - Integer number of elements in a single output vector.
          session - The TensorFlow session object that refers to the current
            computation graph.
          num_heads - The integer number of write heads the NTM uses (future
            feature).
          shift_range - The integer number of shift values that the read/write 
            heads can perform, which corresponds to the direction and magnitude
            of the allowable shifts.
            Shift ranges and corresponding available shift
            directions/magnitudes:
              3 => [-1, 0, 1]
              4 => [-2, -1, 0, 1] 
              5 => [-2, -1, 0, 1, 2]
          name - A string name for the variable scope, for troubleshooting.
        '''

        self.num_heads = 1
        self.sess = session
        self.S = shift_range
        self.N, self.M = mem_size
        self.in_size = input_size
        self.out_size = output_size

        num_lstm_units = 100
        self.dt=tf.float32

        dt = self.dt
        N = self.N
        M = self.M
        S = self.S
        num_heads = self.num_heads

        with tf.variable_scope(name):
            self.feed_in = tf.placeholder(dtype=dt,
                shape=(None, None, input_size))

            self.feed_out = tf.placeholder(dtype=dt,
                shape=(None, None, output_size))

            self.feed_learning_rate = tf.placeholder(dtype=dt, 
                shape=())

            batch_size = tf.shape(self.feed_in)[0]
            seq_length = tf.shape(self.feed_in)[1]

            head_raw = self.controller(self.feed_in, batch_size, seq_length)

            self.ntm_cell = NTMCell(mem_size=(N, M), num_shifts=S)

            write_head, read_head = NTMCell.head_pieces(
                head_raw, mem_size=(N, M), num_shifts=S, axis=2)

            self.write_head, self.read_head = \
                head_pieces_tuple_to_dict(write_head, read_head)

            self.ntm_init_state = tuple(
                [tf.placeholder(dtype=dt, shape=(None, s)) \
                for s in self.ntm_cell.state_size])

            self.ntm_reads, self.ntm_last_state = tf.nn.dynamic_rnn(
                cell=self.ntm_cell, initial_state=self.ntm_init_state,
                inputs=head_raw, dtype=dt)

            self.w_read = self.ntm_last_state[-2]
            self.w_write = self.ntm_last_state[-1]

            ntm_reads_flat = tf.reshape(self.ntm_reads, [-1, M])

            L = tf.Variable(tf.random_normal([M, output_size]))
            b_L = tf.Variable(tf.random_normal([output_size,]))

            logits_flat = tf.matmul(ntm_reads_flat, L) + b_L
            targets_flat = tf.reshape(self.feed_out, [-1, output_size])

            self.error = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(
                    labels=targets_flat, logits=logits_flat))

            self.predictions = tf.sigmoid(
                tf.reshape(logits_flat, [batch_size, seq_length, output_size]))

            optimizer = tf.train.RMSPropOptimizer(
                learning_rate=self.feed_learning_rate, momentum=0.9)

            grads_and_vars = optimizer.compute_gradients(self.error)
            capped_grads = [(tf.clip_by_value(grad, -10., 10.), var) \
                for grad, var in grads_and_vars]

            self.train_op = optimizer.apply_gradients(capped_grads)
    def __init__(self,
                 mem_size,
                 input_size,
                 output_size,
                 session,
                 num_heads=1,
                 shift_range=3,
                 name="NTM"):

        self.num_heads = 1
        self.sess = session
        self.S = shift_range
        self.N, self.M = mem_size
        self.in_size = input_size
        self.out_size = output_size

        num_lstm_units = 100
        self.dt = tf.float32
        self.pi = 64

        pi = self.pi
        dt = self.dt
        N = self.N
        M = self.M
        S = self.S
        num_heads = self.num_heads

        with tf.variable_scope(name):
            self.feed_in = tf.placeholder(dtype=dt,
                                          shape=(None, None, input_size))

            self.feed_out = tf.placeholder(dtype=dt,
                                           shape=(None, None, output_size))

            self.feed_learning_rate = tf.placeholder(dtype=dt, shape=())

            batch_size = tf.shape(self.feed_in)[0]
            seq_length = tf.shape(self.feed_in)[1]

            head_raw = self.controller(self.feed_in, batch_size, seq_length)

            self.ntm_cell = NTMCell(mem_size=(N, M), shift_range=S)

            self.write_head, self.read_head = NTMCell.head_pieces(
                head_raw, mem_size=(N, M), shift_range=S, axis=2, style='dict')

            self.ntm_init_state = tuple(
                [tf.placeholder(dtype=dt, shape=(None, s)) \
                for s in self.ntm_cell.state_size])

            self.ntm_reads, self.ntm_last_state = tf.nn.dynamic_rnn(
                cell=self.ntm_cell,
                initial_state=self.ntm_init_state,
                inputs=head_raw,
                dtype=dt,
                parallel_iterations=pi)

            # Started conversion to the multi-head output here, still have
            # lots to do.
            self.w_read = self.ntm_last_state[N:N + num_heads]
            self.w_write = self.ntm_last_state[N + num_heads:N + 2 * num_heads]

            ntm_reads_flat = [tf.reshape(r, [-1, M]) for r in self.ntm_reads]

            L = tf.Variable(tf.random_normal([M, output_size]))
            b_L = tf.Variable(tf.random_normal([
                output_size,
            ]))

            logits_flat = tf.matmul(ntm_reads_flat, L) + b_L
            targets_flat = tf.reshape(self.feed_out, [-1, output_size])

            self.error = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(labels=targets_flat,
                                                        logits=logits_flat))

            self.predictions = tf.sigmoid(
                tf.reshape(logits_flat, [batch_size, seq_length, output_size]))

            optimizer = tf.train.RMSPropOptimizer(
                learning_rate=self.feed_learning_rate, momentum=0.9)

            grads_and_vars = optimizer.compute_gradients(self.error)
            capped_grads = [(tf.clip_by_value(grad, -10., 10.), var) \
                for grad, var in grads_and_vars]

            self.train_op = optimizer.apply_gradients(capped_grads)
예제 #13
0
config = {
    'epoch': 100000,
    'input_dim': 7,
    'output_dim': 7,
    'length': 5,
    'controller_layer_size': 1,
    'write_head_size': 1,
    'read_head_size': 1,
    'checkpoint_dir': 'checkpoint'
}

if __name__ == "__main__":
    with tf.device('/cpu:0'), tf.Session() as sess:
        cell = NTMCell(input_dim=config['input_dim'],
                       output_dim=config['output_dim'],
                       controller_layer_size=config['controller_layer_size'],
                       write_head_size=config['write_head_size'],
                       read_head_size=config['read_head_size'],
                       controller_dim=32)
        ntm = NTM(cell, sess, config['length'] * 2 + 2)

        if not os.path.isdir(config['checkpoint_dir'] + '/copy_' +
                             str(config['length'] * 2 + 2)):
            print(" [*] Initialize all variables")
            tf.global_variables_initializer().run()
            print(" [*] Initialization finished")
        else:
            ntm.load(config['checkpoint_dir'], 'copy')

        start_time = time.time()
        print('')
        for idx in range(config['epoch']):