def create_ntm(config, sess, **ntm_args): if config.rand_hyper: hyper_params = {} if config.is_test: hyper_params = load_hyperparamters(config) else: hyper_params = generate_hyperparams(config) print(" [*] Hyperparameters: {}".format(hyper_params)) cell = NTMCell(input_dim=config.input_dim, output_dim=config.output_dim, controller_layer_size=hyper_params["c_layer"], controller_dim=hyper_params["c_dim"], mem_size=hyper_params["mem_size"], write_head_size=config.write_head_size, read_head_size=config.read_head_size, is_LSTM_mode=config.is_LSTM_mode) scope = ntm_args.pop('scope', 'NTM-%s' % config.task) # Description + query + plan + answer min_length = (config.min_size - 1) + 1 + config.plan_length + (config.min_size - 1) max_length = int(((config.max_size * (config.max_size - 1) / 2) + 1 + config.plan_length + (config.max_size - 1))) ntm = NTM(cell, sess, min_length, max_length, config.min_size, config.max_size, scope=scope, **ntm_args, lr=hyper_params["lr"], momentum=hyper_params["momentum"], decay=hyper_params["decay"], beta=hyper_params["l2"]) else: cell = NTMCell(input_dim=config.input_dim, output_dim=config.output_dim, controller_layer_size=config.controller_layer_size, controller_dim=config.controller_dim, write_head_size=config.write_head_size, read_head_size=config.read_head_size, is_LSTM_mode=config.is_LSTM_mode) scope = ntm_args.pop('scope', 'NTM-%s' % config.task) # Description + query + plan + answer min_length = (config.min_size - 1) + 1 + config.plan_length + (config.min_size - 1) max_length = int(((config.max_size * (config.max_size - 1) / 2) + 1 + config.plan_length + (config.max_size - 1))) ntm = NTM(cell, sess, min_length, max_length, config.min_size, config.max_size, scope=scope, **ntm_args) return cell, ntm
def inference(images_t, last_labels_t): a, time_steps, width = images_t.get_shape().as_list() b, c, num_labels = last_labels_t.get_shape().as_list() with tf.variable_scope("rnn"): images_t = tf.reshape(images_t, (-1, time_steps, width)) rnn_inputs_t = tf.concat((images_t, last_labels_t), 2) #keep_prob=tf.placeholder(tf.float32) #rnn_inputs_t = tf.nn.dropout(rnn_inputs, keep_prob) if CELL_TYPE == 'lstm': rnn_cell = tf.contrib.rnn.LSTMCell(LSTM_STATE_SIZE, activation=tf.nn.tanh) elif CELL_TYPE == 'ntm': print 'ntm' rnn_cell = NTMCell(memory_slots=128, memory_width=40, controller_size=LSTM_STATE_SIZE) rnn_output_t, rnn_final_state_t = tf.nn.dynamic_rnn(rnn_cell, rnn_inputs_t, time_major=False, dtype=tf.float32, swap_memory=False) #dynami-rnn is to automatically unroll lstm rnn_output_size = rnn_output_t.get_shape().as_list()[-1] W_t = tf.get_variable("W", (rnn_output_size, num_labels), initializer=tf.random_normal_initializer(stddev=0.1)) b_t = tf.get_variable("b", (num_labels), initializer=tf.constant_initializer(0.0)) logits_t = tf.matmul(tf.reshape(rnn_output_t, (-1, rnn_output_size)), W_t) + b_t logits_t = tf.reshape(logits_t, (-1, time_steps, num_labels)) return logits_t
def inference(images_t, last_labels_t): _, time_steps, height, width = images_t.get_shape().as_list() _, _, num_labels = last_labels_t.get_shape().as_list() with tf.variable_scope("rnn"): images_t = tf.reshape(images_t, (-1, time_steps, height * width)) rnn_inputs_t = tf.concat(2, (images_t, last_labels_t)) if CELL_TYPE == 'lstm': rnn_cell = tf.nn.rnn_cell.BasicLSTMCell(LSTM_STATE_SIZE) elif CELL_TYPE == 'ntm': print 'ntm' rnn_cell = NTMCell(memory_slots=128, memory_width=40, controller_size=LSTM_STATE_SIZE) rnn_output_t, rnn_final_state_t = tf.nn.dynamic_rnn(rnn_cell, rnn_inputs_t, time_major=False, dtype=tf.float32, swap_memory=False) with tf.variable_scope("fcout"): rnn_output_size = rnn_output_t.get_shape().as_list()[-1] W_t = tf.get_variable( "W", (rnn_output_size, num_labels), initializer=tf.random_normal_initializer(stddev=0.1)) b_t = tf.get_variable("b", (num_labels), initializer=tf.constant_initializer(0.0)) logits_t = tf.matmul(tf.reshape(rnn_output_t, (-1, rnn_output_size)), W_t) + b_t logits_t = tf.reshape(logits_t, (-1, time_steps, num_labels)) return logits_t
def predict_train(config, sess): """Train an NTM for the copy task given a TensorFlow session, which is a connection to the C++ backend""" if not os.path.isdir(config.checkpoint_dir): raise Exception(" [!] Directory %s not found" % config.checkpoint_dir) # delimiter flag-like vector inputs indicating the start and end # you can see these in the figure examples in the README # this is kind of defined redundantly start_symbol = np.zeros([config.input_dim], dtype=np.float32) start_symbol[0] = 1 end_symbol = np.zeros([config.input_dim], dtype=np.float32) end_symbol[1] = 1 # initialise the neural turing machine and the neural-net controller thing cell = NTMCell(input_dim=config.input_dim, output_dim=config.output_dim, controller_layer_size=config.controller_layer_size, write_head_size=config.write_head_size, read_head_size=config.read_head_size) ntm = NTM(cell, sess, config.min_length, config.max_length*3) print(" [*] Initialize all variables") tf.initialize_all_variables().run() print(" [*] Initialization finished") start_time = time.time() for idx in xrange(config.epoch): # generate a sequence of random length seq_length = randint(config.min_length, config.max_length) * 4 inc_seq, comp_seq = generate_predict_sequence(seq_length, config.input_dim - 2) # this somehow associates the desired inputs and outputs with the NTM feed_dict = {input_:vec for vec, input_ in zip(inc_seq, ntm.inputs)} feed_dict.update( {true_output:vec for vec, true_output in zip(comp_seq, ntm.true_outputs)} ) feed_dict.update({ ntm.start_symbol: start_symbol, ntm.end_symbol: end_symbol }) # this runs the session and returns the current training loss and step # I'm kind of surprised it returns the step, but whatevs _, cost, step = sess.run([ntm.optims[seq_length], ntm.get_loss(seq_length), ntm.global_step], feed_dict=feed_dict) # how does one use these checkpoints? if idx % 100 == 0: ntm.save(config.checkpoint_dir, 'copy', step) if idx % print_interval == 0: print("[%5d] %2d: %.2f (%.1fs)" \ % (idx, seq_length, cost, time.time() - start_time)) print("Training Copy task finished") return cell, ntm
def create_ntm(FLAGS, sess, **ntm_args): cell = NTMCell( input_dim=FLAGS.input_dim, output_dim=FLAGS.output_dim, controller_layer_size=FLAGS.controller_layer_size, write_head_size=FLAGS.write_head_size, read_head_size=FLAGS.read_head_size) ntm = NTM( cell, sess, FLAGS.min_length, FLAGS.max_length, test_max_length=FLAGS.test_max_length, scope='NTM-%s' % FLAGS.task, **ntm_args) return cell, ntm
def copy_train(config): sess = config.sess if not os.path.isdir(config.checkpoint_dir): raise Exception(" [!] Directory %s not found" % config.checkpoint_dir) # delimiter flag for start and end start_symbol = np.zeros([config.input_dim], dtype=np.float32) start_symbol[0] = 1 end_symbol = np.zeros([config.input_dim], dtype=np.float32) end_symbol[1] = 1 cell = NTMCell(input_dim=config.input_dim, output_dim=config.output_dim, controller_layer_size=config.controller_layer_size, write_head_size=config.write_head_size, read_head_size=config.read_head_size) ntm = NTM(cell, sess, config.min_length, config.max_length) print(" [*] Initialize all variables") tf.initialize_all_variables().run() print(" [*] Initialization finished") start_time = time.time() for idx in xrange(config.epoch): seq_length = randint(config.min_length, config.max_length) seq = generate_copy_sequence(seq_length, config.input_dim - 2) feed_dict = {input_: vec for vec, input_ in zip(seq, ntm.inputs)} feed_dict.update({ true_output: vec for vec, true_output in zip(seq, ntm.true_outputs) }) feed_dict.update({ ntm.start_symbol: start_symbol, ntm.end_symbol: end_symbol }) _, cost, step = sess.run([ ntm.optims[seq_length], ntm.get_loss(seq_length), ntm.global_step ], feed_dict=feed_dict) if idx % 100 == 0: ntm.save(config.checkpoint_dir, 'copy', step) if idx % print_interval == 0: print("[%5d] %2d: %.2f (%.1fs)" \ % (idx, seq_length, cost, time.time() - start_time)) print("Training Copy task finished") return cell, ntm
def create_ntm(config, sess, **ntm_args): cell = NTMCell( input_dim=config.input_dim, output_dim=config.output_dim, controller_layer_size=config.controller_layer_size, controller_dim=config.controller_dim, write_head_size=config.write_head_size, read_head_size=config.read_head_size) scope = ntm_args.pop('scope', 'NTM-%s' % config.task) ntm = NTM( cell, sess, config.min_length, config.max_length, test_max_length=config.test_max_length, scope=scope, **ntm_args) return cell, ntm
def setUp(self): ''' Define the parameters that will be used to create the NP forward pass, then perform the NP forward pass. ''' # Parameter definitions min_addresses = 5 max_addresses = 10 min_bits_per_address = 6 max_bits_per_address = 12 max_batch_size = 32 min_batch_size = 10 self.N = np.random.randint(low=min_addresses, high=max_addresses + 1) self.M = np.random.randint(low=min_bits_per_address, high=max_bits_per_address + 1) #self.N, self.M = (10, 9) self.mem_size = (self.N, self.M) min_shifts = 3 max_shifts = self.N - 1 self.S = np.random.randint(low=min_shifts, high=max_shifts + 1) self.shift_range = self.S self.batch_size = np.random.randint(low=min_batch_size, high=max_batch_size) self.sequence_length = np.random.randint(low=3, high=max_addresses) #self.S, self.batch_size, self.sequence_length = (3, 12, 15) self.initial_state = NTMCell(self.mem_size, self.shift_range).bias_state(self.batch_size) self.controller_output = 10*np.random.rand(self.batch_size, self.sequence_length, 4*self.M + 2*self.S + 6) - 5 # Get the reference NP output for a single sequence (only one of the # batch items gets processed to completion). seq_initial_state = tuple([x[0, :] for x in self.initial_state]) self.np_read_addresses, self.np_write_addresses, self.np_reads = \ numpy_forward_pass(self.N, self.M, self.S, seq_initial_state, self.controller_output[0, :, :])
def __init__(self, mem_size, session, num_heads=1, shift_range=3, name="NTM"): ''' Just sets up an NTM without the controller. So all this will do is apply the NTM operations to some set of fake input. ''' self.mem_size = mem_size self.shift_range = shift_range self.sess = session self.num_heads = num_heads (_, num_bits) = self.mem_size dt = tf.float32 head_size = 4*num_bits + 2*self.shift_range + 6 with tf.variable_scope(name): self.ntm_cell = NTMCell(mem_size=self.mem_size, num_shifts=self.shift_range) # [batch_size, sequence_length, 4*M + 2*S + 6] self.feed_controller_input = \ tf.placeholder(dtype=dt, shape=(None, None, head_size)) # ([batch_size, ntm_cell.state_size[0]], ...) self.feed_initial_state = \ tuple([tf.placeholder(dtype=dt, shape=(None, s)) for s in self.ntm_cell.state_size]) self.ntm_reads, self.ntm_last_state = \ tf.nn.dynamic_rnn(cell=self.ntm_cell, initial_state=self.feed_initial_state, inputs=self.feed_controller_input, dtype=dt) self.write_head, self.read_head = \ self.ntm_cell.head_pieces(self.feed_controller_input, mem_size=self.mem_size, num_shifts=self.shift_range, axis=2)
def _initialize(self, observation_t): image_t, last_label_t, _ = observation_t self.batch_size_t = tf.unpack(tf.shape(image_t))[0] _, self.image_height, self.image_width = image_t.get_shape().as_list() _, self.num_actions = last_label_t.get_shape().as_list() self.num_actions += 1 # for "pay for label" with tf.variable_scope("rnn"): if CELL_TYPE == 'lstm': self.rnn_cell = tf.nn.rnn_cell.BasicLSTMCell(LSTM_STATE_SIZE) elif CELL_TYPE == 'ntm': print 'ntm' self.rnn_cell = NTMCell(memory_slots=128, memory_width=40, controller_size=LSTM_STATE_SIZE) #self.rnn_cell = tf.nn.rnn_cell.BasicLSTMCell(LSTM_STATE_SIZE, state_is_tuple=True) self.rnn_state_t = self.rnn_cell.zero_state( self.batch_size_t, tf.float32) self.q_t = self._Q(observation_t) self.a_t = None self.initialized = True
def __init__(self, mem_size, input_size, output_size, session, num_heads=1, shift_range=3, name="NTM"): ''' Builds the computation graph for the Neural Turing Machine. The tasks from the original paper call for the NTM to take in a sequence of arrays, and produce some output. Let B = batch size, T = sequence length, and L = array length, then a single input sequence is a matrix of size [TxL]. A batch of these input sequences has size [BxTxL]. Arguments: mem_size - Tuple of integers corresponding to the number of storage locations and the dimension of each storage location (in the paper the memory matrix is NxM, mem_size refers to (N, M)). input_size - Integer number of elements in a single input vector (the value L). output_size - Integer number of elements in a single output vector. session - The TensorFlow session object that refers to the current computation graph. num_heads - The integer number of write heads the NTM uses (future feature). shift_range - The integer number of shift values that the read/write heads can perform, which corresponds to the direction and magnitude of the allowable shifts. Shift ranges and corresponding available shift directions/magnitudes: 3 => [-1, 0, 1] 4 => [-2, -1, 0, 1] 5 => [-2, -1, 0, 1, 2] name - A string name for the variable scope, for troubleshooting. ''' self.num_heads = 1 self.sess = session self.S = shift_range self.N, self.M = mem_size self.in_size = input_size self.out_size = output_size num_lstm_units = 100 self.dt=tf.float32 dt = self.dt N = self.N M = self.M S = self.S num_heads = self.num_heads with tf.variable_scope(name): self.feed_in = tf.placeholder(dtype=dt, shape=(None, None, input_size)) self.feed_out = tf.placeholder(dtype=dt, shape=(None, None, output_size)) self.feed_learning_rate = tf.placeholder(dtype=dt, shape=()) batch_size = tf.shape(self.feed_in)[0] seq_length = tf.shape(self.feed_in)[1] head_raw = self.controller(self.feed_in, batch_size, seq_length) self.ntm_cell = NTMCell(mem_size=(N, M), num_shifts=S) write_head, read_head = NTMCell.head_pieces( head_raw, mem_size=(N, M), num_shifts=S, axis=2) self.write_head, self.read_head = \ head_pieces_tuple_to_dict(write_head, read_head) self.ntm_init_state = tuple( [tf.placeholder(dtype=dt, shape=(None, s)) \ for s in self.ntm_cell.state_size]) self.ntm_reads, self.ntm_last_state = tf.nn.dynamic_rnn( cell=self.ntm_cell, initial_state=self.ntm_init_state, inputs=head_raw, dtype=dt) self.w_read = self.ntm_last_state[-2] self.w_write = self.ntm_last_state[-1] ntm_reads_flat = tf.reshape(self.ntm_reads, [-1, M]) L = tf.Variable(tf.random_normal([M, output_size])) b_L = tf.Variable(tf.random_normal([output_size,])) logits_flat = tf.matmul(ntm_reads_flat, L) + b_L targets_flat = tf.reshape(self.feed_out, [-1, output_size]) self.error = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( labels=targets_flat, logits=logits_flat)) self.predictions = tf.sigmoid( tf.reshape(logits_flat, [batch_size, seq_length, output_size])) optimizer = tf.train.RMSPropOptimizer( learning_rate=self.feed_learning_rate, momentum=0.9) grads_and_vars = optimizer.compute_gradients(self.error) capped_grads = [(tf.clip_by_value(grad, -10., 10.), var) \ for grad, var in grads_and_vars] self.train_op = optimizer.apply_gradients(capped_grads)
def __init__(self, mem_size, input_size, output_size, session, num_heads=1, shift_range=3, name="NTM"): self.num_heads = 1 self.sess = session self.S = shift_range self.N, self.M = mem_size self.in_size = input_size self.out_size = output_size num_lstm_units = 100 self.dt = tf.float32 self.pi = 64 pi = self.pi dt = self.dt N = self.N M = self.M S = self.S num_heads = self.num_heads with tf.variable_scope(name): self.feed_in = tf.placeholder(dtype=dt, shape=(None, None, input_size)) self.feed_out = tf.placeholder(dtype=dt, shape=(None, None, output_size)) self.feed_learning_rate = tf.placeholder(dtype=dt, shape=()) batch_size = tf.shape(self.feed_in)[0] seq_length = tf.shape(self.feed_in)[1] head_raw = self.controller(self.feed_in, batch_size, seq_length) self.ntm_cell = NTMCell(mem_size=(N, M), shift_range=S) self.write_head, self.read_head = NTMCell.head_pieces( head_raw, mem_size=(N, M), shift_range=S, axis=2, style='dict') self.ntm_init_state = tuple( [tf.placeholder(dtype=dt, shape=(None, s)) \ for s in self.ntm_cell.state_size]) self.ntm_reads, self.ntm_last_state = tf.nn.dynamic_rnn( cell=self.ntm_cell, initial_state=self.ntm_init_state, inputs=head_raw, dtype=dt, parallel_iterations=pi) # Started conversion to the multi-head output here, still have # lots to do. self.w_read = self.ntm_last_state[N:N + num_heads] self.w_write = self.ntm_last_state[N + num_heads:N + 2 * num_heads] ntm_reads_flat = [tf.reshape(r, [-1, M]) for r in self.ntm_reads] L = tf.Variable(tf.random_normal([M, output_size])) b_L = tf.Variable(tf.random_normal([ output_size, ])) logits_flat = tf.matmul(ntm_reads_flat, L) + b_L targets_flat = tf.reshape(self.feed_out, [-1, output_size]) self.error = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=targets_flat, logits=logits_flat)) self.predictions = tf.sigmoid( tf.reshape(logits_flat, [batch_size, seq_length, output_size])) optimizer = tf.train.RMSPropOptimizer( learning_rate=self.feed_learning_rate, momentum=0.9) grads_and_vars = optimizer.compute_gradients(self.error) capped_grads = [(tf.clip_by_value(grad, -10., 10.), var) \ for grad, var in grads_and_vars] self.train_op = optimizer.apply_gradients(capped_grads)
config = { 'epoch': 100000, 'input_dim': 7, 'output_dim': 7, 'length': 5, 'controller_layer_size': 1, 'write_head_size': 1, 'read_head_size': 1, 'checkpoint_dir': 'checkpoint' } if __name__ == "__main__": with tf.device('/cpu:0'), tf.Session() as sess: cell = NTMCell(input_dim=config['input_dim'], output_dim=config['output_dim'], controller_layer_size=config['controller_layer_size'], write_head_size=config['write_head_size'], read_head_size=config['read_head_size'], controller_dim=32) ntm = NTM(cell, sess, config['length'] * 2 + 2) if not os.path.isdir(config['checkpoint_dir'] + '/copy_' + str(config['length'] * 2 + 2)): print(" [*] Initialize all variables") tf.global_variables_initializer().run() print(" [*] Initialization finished") else: ntm.load(config['checkpoint_dir'], 'copy') start_time = time.time() print('') for idx in range(config['epoch']):