def predict_train(config, sess): """Train an NTM for the copy task given a TensorFlow session, which is a connection to the C++ backend""" if not os.path.isdir(config.checkpoint_dir): raise Exception(" [!] Directory %s not found" % config.checkpoint_dir) # delimiter flag-like vector inputs indicating the start and end # you can see these in the figure examples in the README # this is kind of defined redundantly start_symbol = np.zeros([config.input_dim], dtype=np.float32) start_symbol[0] = 1 end_symbol = np.zeros([config.input_dim], dtype=np.float32) end_symbol[1] = 1 # initialise the neural turing machine and the neural-net controller thing cell = NTMCell(input_dim=config.input_dim, output_dim=config.output_dim, controller_layer_size=config.controller_layer_size, write_head_size=config.write_head_size, read_head_size=config.read_head_size) ntm = NTM(cell, sess, config.min_length, config.max_length*3) print(" [*] Initialize all variables") tf.initialize_all_variables().run() print(" [*] Initialization finished") start_time = time.time() for idx in xrange(config.epoch): # generate a sequence of random length seq_length = randint(config.min_length, config.max_length) * 4 inc_seq, comp_seq = generate_predict_sequence(seq_length, config.input_dim - 2) # this somehow associates the desired inputs and outputs with the NTM feed_dict = {input_:vec for vec, input_ in zip(inc_seq, ntm.inputs)} feed_dict.update( {true_output:vec for vec, true_output in zip(comp_seq, ntm.true_outputs)} ) feed_dict.update({ ntm.start_symbol: start_symbol, ntm.end_symbol: end_symbol }) # this runs the session and returns the current training loss and step # I'm kind of surprised it returns the step, but whatevs _, cost, step = sess.run([ntm.optims[seq_length], ntm.get_loss(seq_length), ntm.global_step], feed_dict=feed_dict) # how does one use these checkpoints? if idx % 100 == 0: ntm.save(config.checkpoint_dir, 'copy', step) if idx % print_interval == 0: print("[%5d] %2d: %.2f (%.1fs)" \ % (idx, seq_length, cost, time.time() - start_time)) print("Training Copy task finished") return cell, ntm
def copy_train(config): sess = config.sess if not os.path.isdir(config.checkpoint_dir): raise Exception(" [!] Directory %s not found" % config.checkpoint_dir) # delimiter flag for start and end start_symbol = np.zeros([config.input_dim], dtype=np.float32) start_symbol[0] = 1 end_symbol = np.zeros([config.input_dim], dtype=np.float32) end_symbol[1] = 1 cell = NTMCell(input_dim=config.input_dim, output_dim=config.output_dim, controller_layer_size=config.controller_layer_size, write_head_size=config.write_head_size, read_head_size=config.read_head_size) ntm = NTM(cell, sess, config.min_length, config.max_length) print(" [*] Initialize all variables") tf.initialize_all_variables().run() print(" [*] Initialization finished") start_time = time.time() for idx in xrange(config.epoch): seq_length = randint(config.min_length, config.max_length) seq = generate_copy_sequence(seq_length, config.input_dim - 2) feed_dict = {input_: vec for vec, input_ in zip(seq, ntm.inputs)} feed_dict.update({ true_output: vec for vec, true_output in zip(seq, ntm.true_outputs) }) feed_dict.update({ ntm.start_symbol: start_symbol, ntm.end_symbol: end_symbol }) _, cost, step = sess.run([ ntm.optims[seq_length], ntm.get_loss(seq_length), ntm.global_step ], feed_dict=feed_dict) if idx % 100 == 0: ntm.save(config.checkpoint_dir, 'copy', step) if idx % print_interval == 0: print("[%5d] %2d: %.2f (%.1fs)" \ % (idx, seq_length, cost, time.time() - start_time)) print("Training Copy task finished") return cell, ntm
def copy_train(config): sess = config.sess if not os.path.isdir(config.checkpoint_dir): raise Exception(" [!] Directory %s not found" % config.checkpoint_dir) # delimiter flag for start and end start_symbol = np.zeros([config.input_dim], dtype=np.float32) start_symbol[0] = 1 end_symbol = np.zeros([config.input_dim], dtype=np.float32) end_symbol[1] = 1 cell = NTMCell(input_dim=config.input_dim, output_dim=config.output_dim, controller_layer_size=config.controller_layer_size, write_head_size=config.write_head_size, read_head_size=config.read_head_size) ntm = NTM(cell, sess, config.min_length, config.max_length) print(" [*] Initialize all variables") tf.initialize_all_variables().run() print(" [*] Initialization finished") start_time = time.time() for idx in xrange(config.epoch): seq_length = randint(config.min_length, config.max_length) seq = generate_copy_sequence(seq_length, config.input_dim - 2) feed_dict = {input_:vec for vec, input_ in zip(seq, ntm.inputs)} feed_dict.update( {true_output:vec for vec, true_output in zip(seq, ntm.true_outputs)} ) feed_dict.update({ ntm.start_symbol: start_symbol, ntm.end_symbol: end_symbol }) _, cost, step = sess.run([ntm.optims[seq_length], ntm.get_loss(seq_length), ntm.global_step], feed_dict=feed_dict) if idx % 100 == 0: ntm.save(config.checkpoint_dir, 'copy', step) if idx % print_interval == 0: print("[%5d] %2d: %.2f (%.1fs)" \ % (idx, seq_length, cost, time.time() - start_time)) print("Training Copy task finished") return cell, ntm