Example #1
0
def predict_train(config, sess):
    """Train an NTM for the copy task given a TensorFlow session, which is a
    connection to the C++ backend"""

    if not os.path.isdir(config.checkpoint_dir):
        raise Exception(" [!] Directory %s not found" % config.checkpoint_dir)

    # delimiter flag-like vector inputs indicating the start and end
    # you can see these in the figure examples in the README
    # this is kind of defined redundantly
    start_symbol = np.zeros([config.input_dim], dtype=np.float32)
    start_symbol[0] = 1
    end_symbol = np.zeros([config.input_dim], dtype=np.float32)
    end_symbol[1] = 1

    # initialise the neural turing machine and the neural-net controller thing
    cell = NTMCell(input_dim=config.input_dim,
                   output_dim=config.output_dim,
                   controller_layer_size=config.controller_layer_size,
                   write_head_size=config.write_head_size,
                   read_head_size=config.read_head_size)
    ntm = NTM(cell, sess, config.min_length, config.max_length*3)

    print(" [*] Initialize all variables")
    tf.initialize_all_variables().run()
    print(" [*] Initialization finished")

    start_time = time.time()
    for idx in xrange(config.epoch):
        # generate a sequence of random length
        seq_length = randint(config.min_length, config.max_length) * 4
        inc_seq, comp_seq = generate_predict_sequence(seq_length, config.input_dim - 2)

        # this somehow associates the desired inputs and outputs with the NTM
        feed_dict = {input_:vec for vec, input_ in zip(inc_seq, ntm.inputs)}
        feed_dict.update(
            {true_output:vec for vec, true_output in zip(comp_seq, ntm.true_outputs)}
        )
        feed_dict.update({
            ntm.start_symbol: start_symbol,
            ntm.end_symbol: end_symbol
        })

        # this runs the session and returns the current training loss and step
        # I'm kind of surprised it returns the step, but whatevs
        _, cost, step = sess.run([ntm.optims[seq_length],
                                  ntm.get_loss(seq_length),
                                  ntm.global_step], feed_dict=feed_dict)

        # how does one use these checkpoints?
        if idx % 100 == 0:
            ntm.save(config.checkpoint_dir, 'copy', step)

        if idx % print_interval == 0:
            print("[%5d] %2d: %.2f (%.1fs)" \
                % (idx, seq_length, cost, time.time() - start_time))

    print("Training Copy task finished")
    return cell, ntm
Example #2
0
def copy_train(config):
    sess = config.sess

    if not os.path.isdir(config.checkpoint_dir):
        raise Exception(" [!] Directory %s not found" % config.checkpoint_dir)

    # delimiter flag for start and end
    start_symbol = np.zeros([config.input_dim], dtype=np.float32)
    start_symbol[0] = 1
    end_symbol = np.zeros([config.input_dim], dtype=np.float32)
    end_symbol[1] = 1

    cell = NTMCell(input_dim=config.input_dim,
                   output_dim=config.output_dim,
                   controller_layer_size=config.controller_layer_size,
                   write_head_size=config.write_head_size,
                   read_head_size=config.read_head_size)
    ntm = NTM(cell, sess, config.min_length, config.max_length)

    print(" [*] Initialize all variables")
    tf.initialize_all_variables().run()
    print(" [*] Initialization finished")

    start_time = time.time()
    for idx in xrange(config.epoch):
        seq_length = randint(config.min_length, config.max_length)
        seq = generate_copy_sequence(seq_length, config.input_dim - 2)

        feed_dict = {input_: vec for vec, input_ in zip(seq, ntm.inputs)}
        feed_dict.update({
            true_output: vec
            for vec, true_output in zip(seq, ntm.true_outputs)
        })
        feed_dict.update({
            ntm.start_symbol: start_symbol,
            ntm.end_symbol: end_symbol
        })

        _, cost, step = sess.run([
            ntm.optims[seq_length],
            ntm.get_loss(seq_length), ntm.global_step
        ],
                                 feed_dict=feed_dict)

        if idx % 100 == 0:
            ntm.save(config.checkpoint_dir, 'copy', step)

        if idx % print_interval == 0:
            print("[%5d] %2d: %.2f (%.1fs)" \
                % (idx, seq_length, cost, time.time() - start_time))

    print("Training Copy task finished")
    return cell, ntm
Example #3
0
def copy_train(config):
    sess = config.sess

    if not os.path.isdir(config.checkpoint_dir):
        raise Exception(" [!] Directory %s not found" % config.checkpoint_dir)

    # delimiter flag for start and end
    start_symbol = np.zeros([config.input_dim], dtype=np.float32)
    start_symbol[0] = 1
    end_symbol = np.zeros([config.input_dim], dtype=np.float32)
    end_symbol[1] = 1

    cell = NTMCell(input_dim=config.input_dim,
                   output_dim=config.output_dim,
                   controller_layer_size=config.controller_layer_size,
                   write_head_size=config.write_head_size,
                   read_head_size=config.read_head_size)
    ntm = NTM(cell, sess, config.min_length, config.max_length)

    print(" [*] Initialize all variables")
    tf.initialize_all_variables().run()
    print(" [*] Initialization finished")

    start_time = time.time()
    for idx in xrange(config.epoch):
        seq_length = randint(config.min_length, config.max_length)
        seq = generate_copy_sequence(seq_length, config.input_dim - 2)

        feed_dict = {input_:vec for vec, input_ in zip(seq, ntm.inputs)}
        feed_dict.update(
            {true_output:vec for vec, true_output in zip(seq, ntm.true_outputs)}
        )
        feed_dict.update({
            ntm.start_symbol: start_symbol,
            ntm.end_symbol: end_symbol
        })

        _, cost, step = sess.run([ntm.optims[seq_length],
                                  ntm.get_loss(seq_length),
                                  ntm.global_step], feed_dict=feed_dict)

        if idx % 100 == 0:
            ntm.save(config.checkpoint_dir, 'copy', step)

        if idx % print_interval == 0:
            print("[%5d] %2d: %.2f (%.1fs)" \
                % (idx, seq_length, cost, time.time() - start_time))

    print("Training Copy task finished")
    return cell, ntm
Example #4
0
                            turing_machine.sequence_length:
                            2 * random_length + 2
                        })

                    #  TODO: This works for batch size = 1
                    seq_out = np.round(np.reshape(temp_output,
                                                  (1, -1))).tolist()[0]
                    seq_target = np.reshape(target_output, (1, -1)).tolist()[0]
                    dist = hamming_distance(seq_out, seq_target)
                    val = tf.compat.v1.Summary.Value(tag="Hamming_%",
                                                     simple_value=dist)
                    summary2 = tf.compat.v1.Summary(value=[val])

                    summarizer.add_summary(summary, i)
                    summarizer.add_summary(summary2, i)

                    cycle_time = time.perf_counter() - last_time
                    print(f"Iteration: {i}/{iterations}")
                    print("Avg. Logistic Loss: %.4f" %
                          (np.mean(last_100_losses) / batch_size))
                    print(
                        "Time needed: %.4f s, sample/sec: %d" %
                        (cycle_time, log_frequency * batch_size / cycle_time))
                    last_100_losses = []
                    last_time = time.perf_counter()

                if take_checkpoint:
                    llprint("\nSaving Checkpoint ... "),
                    turing_machine.save(session, ckpts_dir, 'step-%d' % (i))
                    llprint("Done!\n")
Example #5
0
            print(" [*] Initialization finished")
        else:
            ntm.load(config['checkpoint_dir'], 'copy')

        start_time = time.time()
        print('')
        for idx in range(config['epoch']):
            seq_length = np.random.randint(2, config['length'] + 1)
            X, Y, masks = build_seq_batch(seq_length, config['length'],
                                          config['input_dim'] - 2)

            feed_dict = {ntm.inputs: X, ntm.true_outputs: Y, ntm.masks: masks}

            if idx % print_interval != 0:
                _, cost, step = sess.run(
                    [ntm.optims, ntm.losses, ntm.global_step],
                    feed_dict=feed_dict)
            else:
                _, cost, step, Y_pre = sess.run(
                    [ntm.optims, ntm.losses, ntm.global_step, ntm.outputs],
                    feed_dict=feed_dict)
                print("[%5d] %2d: %.4f (%.1fs)" \
                      % (idx, seq_length, cost, time.time() - start_time))
                Y_pre = np.array(Y_pre)
                mask_id = masks.reshape(-1).astype(bool)
                print(np.argmax(Y, axis=1)[mask_id])
                print(np.argmax(Y_pre, axis=1)[mask_id])

        print("Training Copy task finished")
        ntm.save(config['checkpoint_dir'], 'n_copy', idx)