예제 #1
0
    with graph.as_default():
        with tf.Session(graph=graph) as session:

            llprint("Building Computational Graph ... ")

            optimizer = tf.train.RMSPropOptimizer(learning_rate,
                                                  momentum=momentum)
            summerizer = tf.train.SummaryWriter(tb_logs_dir, session.graph)

            ncomputer = DNC(FeedforwardController, input_size, output_size,
                            2 * sequence_max_length + 1, words_count,
                            word_size, read_heads, batch_size)

            # squash the DNC output between 0 and 1
            output, _ = ncomputer.get_outputs()
            squashed_output = tf.clip_by_value(tf.sigmoid(output), 1e-6,
                                               1. - 1e-6)

            loss = binary_cross_entropy(squashed_output,
                                        ncomputer.target_output)

            gradients = optimizer.compute_gradients(loss)
            for i, (grad, var) in enumerate(gradients):
                if grad is not None:
                    #with tf.control_dependencies([tf.Print(tf.zeros(1), [var.name, tf.is_nan(grad)])]):
                    gradients[i] = (tf.clip_by_value(grad, -10, 10), var)

            apply_gradients = optimizer.apply_gradients(gradients)

            summerize_loss = tf.scalar_summary("Loss", loss)
예제 #2
0
    edges = data_dict["edge"]
    metro_graph = data_dict["graph"]

    with graph.as_default():
        with tf.Session(graph=graph) as session:

            llprint("Building Computational Graph ... ")

            optimizer = tf.train.RMSPropOptimizer(learning_rate,
                                                  momentum=momentum)

            ncomputer = DNC(RecurrentController, input_size, output_size,
                            sequence_max_length, words_count, word_size,
                            read_heads, batch_size)

            output, memory_views = ncomputer.get_outputs()
            loss = None
            for _k in range(9):
                tmp_loss = tf.reduce_mean(
                    tf.nn.softmax_cross_entropy_with_logits_v2(
                        logits=output[:, :, _k * 10:(_k + 1) * 10],
                        labels=ncomputer.target_output[:, :,
                                                       _k * 10:(_k + 1) * 10],
                        name="categorical_loss_" + str(_k + 1)))
                if loss is None:
                    loss = tmp_loss
                else:
                    loss = loss + tmp_loss
            loss = loss / 9.0
            # print(loss)
예제 #3
0
def main():
    """
    Runs an interactive shell where the user can submit input with their chosen deliminator and see the output of the
    DNC's latest checkpoint. 
    :return: None
    """
    dir_path = os.path.dirname(os.path.realpath(__file__))
    ckpts_dir = os.path.join(dir_path, 'checkpoints')
    lexicon_dictionary = load(
        os.path.join(dir_path, 'data', 'encoded', 'lexicon-dict.pkl'))
    target_code = lexicon_dictionary["#"]

    graph = tf.Graph()
    with graph.as_default():
        with tf.compat.v1.Session(graph=graph) as session:

            ncomputer = DNC(
                RecurrentController,
                input_size=len(lexicon_dictionary),
                output_size=len(lexicon_dictionary),
                max_sequence_length=100,
                memory_words_num=256,
                memory_word_size=64,
                memory_read_heads=4,
            )

            ncomputer.restore(session, ckpts_dir, 'step-100001')

            outputs, _ = ncomputer.get_outputs()
            softmaxed = tf.nn.softmax(outputs)

            print(
                "This is an interactive shell script. Here a user may test a trained neural network by passing it "
                "custom inputs and seeing if they elicid the desired output. \n Please note that a user may only "
                "test inputs that consists of words in the neural network's lexicon. If the user would like to quit"
                " the program, they can type ':q!' when prompted for an input. \n If the user would like to see the"
                " network's lexicon, they can type ':dict' when prompted for an input. Otherwise, the user may "
                "simply type the sequence of inputs that they would like to use and then hit the enter key. \n "
                "They will then be asked to specify the deliminator that distinguishes one word from another word."
                " The input will then be split using that deliminator. \n If all resulting inputs are in the "
                "network's lexicon, the network will then be fed these inputs and its output will be printed for "
                "the user along with its expected output.")

            my_input = input("Input:")
            while my_input != ":q!":
                if my_input == ":dict":
                    print(
                        "The neural network has been trained to recognize the following words:"
                    )
                    print(lexicon_dictionary)
                    my_input = input("Input:")
                    continue
                deliminator = input("Deliminator:")
                story = my_input.split(deliminator)
                if not set(story).issubset(lexicon_dictionary):
                    print("You may only test key in the lexicon dictionary.")
                    my_input = input("Input:")
                    continue

                desired_answers = get_solution(story)
                encoded_story = []
                encoded_answers = []
                for an_input in story:
                    encoded_story.append(lexicon_dictionary[an_input])
                for an_output in desired_answers:
                    encoded_answers.append(lexicon_dictionary[an_output])
                input_vec, _, seq_len, _ = prepare_sample(
                    [encoded_story], encoded_answers, target_code,
                    len(lexicon_dictionary))
                softmax_output = session.run(softmaxed,
                                             feed_dict={
                                                 ncomputer.input_data:
                                                 input_vec,
                                                 ncomputer.sequence_length:
                                                 seq_len
                                             })

                softmax_output = np.squeeze(softmax_output, axis=0)
                given_answers = np.argmax(
                    softmax_output[:len(desired_answers)], axis=1)

                print("Output: ", [
                    list(lexicon_dictionary.keys())[list(
                        lexicon_dictionary.values()).index(an_answer)]
                    for an_answer in given_answers
                ])
                is_correct = True
                if len(given_answers) != len(encoded_answers):
                    is_correct = False
                else:
                    for i in range(len(given_answers)):
                        if given_answers[i] != encoded_answers[i]:
                            is_correct = False
                if is_correct:
                    print("Correct!")
                else:
                    print("Expected: ", desired_answers)

                my_input = input("Input:")
예제 #4
0
def main():
    """
    Train the DNC to take answer questions from the DREAM dataset.
    :return: None.
    """
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
    dirname = os.path.dirname(__file__)
    ckpts_dir = os.path.join(dirname, 'checkpoints/')
    data_dir = os.path.join(dirname, 'data', 'encoded')
    tb_logs_dir = os.path.join(dirname, 'logs')

    llprint("Loading Data ... ")
    lexicon_dict = load(os.path.join(data_dir, 'lexicon-dict.pkl'))
    data_files = os.listdir(os.path.join(data_dir, 'train'))
    llprint("Done!\n")

    batch_size = 1
    input_size = output_size = len(lexicon_dict)
    sequence_max_length = 100
    word_space_size = len(lexicon_dict)
    words_count = 256
    word_size = 64
    read_heads = 4

    learning_rate = 1e-4
    momentum = 0.9

    from_checkpoint = None
    iterations = 100000

    start_step = 0

    options, _ = getopt.getopt(sys.argv[1:], '',
                               ['checkpoint=', 'iterations=', 'start='])

    for opt in options:
        if opt[0] == '--checkpoint':
            from_checkpoint = opt[1]
            print("Checkpoint found")
        elif opt[0] == '--iterations':
            iterations = int(opt[1])
        elif opt[0] == '--start':
            start_step = int(opt[1])

    graph = tf.Graph()
    with graph.as_default():
        with tf.compat.v1.Session(graph=graph) as session:

            llprint("Building Computational Graph ... ")

            optimizer = tf.compat.v1.train.RMSPropOptimizer(learning_rate,
                                                            momentum=momentum)
            summarizer = tf.compat.v1.summary.FileWriter(
                tb_logs_dir, session.graph)

            ncomputer = DNC(RecurrentController, input_size, output_size,
                            sequence_max_length, words_count, word_size,
                            read_heads, batch_size)

            output, _ = ncomputer.get_outputs()

            loss_weights = tf.compat.v1.placeholder(tf.float32,
                                                    [batch_size, None, 1])
            loss = tf.reduce_mean(
                loss_weights * tf.nn.softmax_cross_entropy_with_logits(
                    logits=output, labels=ncomputer.target_output))

            summaries = []

            gradients = optimizer.compute_gradients(loss)
            for i, (grad, var) in enumerate(gradients):
                if grad is not None:
                    gradients[i] = (tf.clip_by_value(grad, -10, 10), var)
            for (grad, var) in gradients:
                if grad is not None:
                    summaries.append(
                        tf.compat.v1.summary.histogram(var.name + '/grad',
                                                       grad))

            apply_gradients = optimizer.apply_gradients(gradients)

            summaries.append(tf.compat.v1.summary.scalar("Loss", loss))

            summarize_op = tf.compat.v1.summary.merge(summaries)
            no_summarize = tf.no_op()

            llprint("Done!\n")

            llprint("Initializing Variables ... ")
            session.run(tf.compat.v1.global_variables_initializer())
            llprint("Done!\n")

            if from_checkpoint is not None:
                llprint("Restoring Checkpoint %s ... " % from_checkpoint)
                ncomputer.restore(session, ckpts_dir, from_checkpoint)
                llprint("Done!\n")
            elif os.path.exists(ckpts_dir):
                checkpoints = os.listdir(ckpts_dir)
                if len(checkpoints) != 0 and any("step-" in s
                                                 for s in checkpoints):
                    checkpoint_numbers = [
                        int(checkpoint[checkpoint.find("-") + 1:])
                        for checkpoint in checkpoints
                        if checkpoint[checkpoint.find("-") + 1:].isnumeric()
                    ]
                    checkpoint_numbers.sort()
                    ncomputer.restore(session, ckpts_dir,
                                      f"step-{checkpoint_numbers[-1]}")
                    start = checkpoint_numbers[-1]
                    end = 100000

            last_100_losses = []

            if not 'start' in locals():
                start = 0
                end = 100000
            if from_checkpoint is not None:
                start = int(from_checkpoint[from_checkpoint.find("-") + 1:])

            start_time_100 = time.time()
            end_time_100 = None
            avg_100_time = 0.
            avg_counter = 0

            for i in range(start, end + 1):
                try:
                    llprint("\rIteration %d/%d" % (i, end))

                    sample = np.random.choice(data_files, 1)
                    with open(os.path.join(data_dir, 'train', sample[0])) as f:
                        sample = json.load(f)
                    input_data, target_output, seq_len, weights = prepare_sample(
                        sample, lexicon_dict['='], word_space_size,
                        lexicon_dict)

                    summarize = (i % 100 == 0)
                    take_checkpoint = (i != 0) and (i % 200 == 0)
                    #For debugging
                    outputs, _ = ncomputer.get_outputs()
                    softmaxed = tf.nn.softmax(outputs)

                    loss_value, _, summary, softmax_output = session.run(
                        [
                            loss, apply_gradients,
                            summarize_op if summarize else no_summarize,
                            softmaxed
                        ],
                        feed_dict={
                            ncomputer.input_data: input_data,
                            ncomputer.target_output: target_output,
                            ncomputer.sequence_length: seq_len,
                            loss_weights: weights
                        })
                    softmax_output = np.squeeze(softmax_output, axis=0)
                    given_answers = np.argmax(softmax_output, axis=1)

                    words = []
                    for an_array in target_output[0]:
                        for word in np.where(an_array == 1):
                            words.extend([
                                list(lexicon_dict.keys())[np.where(
                                    an_array == 1)[0][0]]
                            ])

                    last_100_losses.append(loss_value)
                    if summarize:
                        print("\n\tLoss value: ", loss_value)
                        print("\tTarget output: ", words)
                        print("\tOutput: ", [
                            list(lexicon_dict.keys())[num]
                            for num in given_answers
                        ])
                        summarizer.add_summary(summary, i)
                        llprint("\tAvg. Cross-Entropy: %.7f\n" %
                                (np.mean(last_100_losses)))

                        end_time_100 = time.time()
                        elapsed_time = (end_time_100 - start_time_100) / 60
                        avg_counter += 1
                        avg_100_time += (1. / avg_counter) * (elapsed_time -
                                                              avg_100_time)
                        estimated_time = (avg_100_time *
                                          ((end - i) / 100.)) / 60.

                        print("\tAvg. 100 iterations time: %.2f minutes" %
                              avg_100_time)
                        print("\tApprox. time to completion: %.2f hours\n" %
                              estimated_time)

                        start_time_100 = time.time()
                        last_100_losses = []

                    if take_checkpoint:
                        llprint("\nSaving Checkpoint ... line 237 "),
                        ncomputer.save(session, ckpts_dir, 'step-%d' % i)
                        llprint("Done!\n")

                except KeyboardInterrupt:

                    llprint("\nSaving Checkpoint ... "),
                    ncomputer.save(session, ckpts_dir, 'step-%d' % i)
                    llprint("Done!\n")
                    sys.exit(0)
예제 #5
0
    with graph.as_default():
        with tf.Session(graph=graph) as session:

            llprint("Building Computational Graph ... ")

            ncomputer = DNC(FeedforwardController, input_size, output_size,
                            sequence_max_length, words_count, word_size,
                            read_heads, batch_size)

            if LOG_GRAPH_WITHOUT_OPTIMIZER:
                summerizer = tf.train.SummaryWriter(tb_logs_dir, session.graph)
                session.run(tf.initialize_all_variables())
                exit()

            # squash the DNC output between 0 and 1
            output, packed_memory_view = ncomputer.get_outputs()
            squashed_output = tf.clip_by_value(tf.sigmoid(output), 1e-6,
                                               1. - 1e-6)

            loss = binary_cross_entropy(squashed_output,
                                        ncomputer.target_output)

            summeries = []

            optimizer = tf.train.RMSPropOptimizer(learning_rate,
                                                  momentum=momentum)
            gradients = optimizer.compute_gradients(loss)
            for i, (grad, var) in enumerate(gradients):
                if grad is not None:
                    summeries.append(
                        tf.histogram_summary(var.name + '/grad', grad))
예제 #6
0
def main():
    """
    Train the DNC to take a word and list its instances of vowels in order of occurrence.
    :return: None.
    """
    dirname = os.path.dirname(__file__)
    ckpts_dir = os.path.join(dirname, 'checkpoints')
    data_dir = os.path.join(dirname, 'data', 'encoded')
    tb_logs_dir = os.path.join(dirname, 'logs')

    llprint("Loading Data ... ")
    lexicon_dict = load(os.path.join(data_dir, 'lexicon-dict.pkl'))
    data = load(os.path.join(data_dir, 'train', 'train.pkl'))
    llprint("Done!\n")

    batch_size = 1
    input_size = output_size = len(lexicon_dict)
    sequence_max_length = 100
    dict_size = len(lexicon_dict)
    words_count = 256
    word_size = 64
    read_heads = 4

    learning_rate = 1e-4
    momentum = 0.9

    from_checkpoint = None
    iterations = 100000
    start_step = 0

    options, _ = getopt.getopt(sys.argv[1:], '',
                               ['checkpoint=', 'iterations=', 'start='])

    for opt in options:
        if opt[0] == '--checkpoint':
            from_checkpoint = opt[1]
        elif opt[0] == '--iterations':
            iterations = int(opt[1])
        elif opt[0] == '--start':
            start_step = int(opt[1])

    graph = tf.Graph()
    with graph.as_default():
        with tf.compat.v1.Session(graph=graph) as session:

            llprint("Building Computational Graph ... ")

            optimizer = tf.compat.v1.train.RMSPropOptimizer(learning_rate,
                                                            momentum=momentum)
            summarizer = tf.compat.v1.summary.FileWriter(
                tb_logs_dir, session.graph)

            ncomputer = DNC(RecurrentController, input_size, output_size,
                            sequence_max_length, words_count, word_size,
                            read_heads, batch_size)

            output, _ = ncomputer.get_outputs()

            loss_weights = tf.compat.v1.placeholder(tf.float32,
                                                    [batch_size, None, 1])

            loss = tf.reduce_mean(
                loss_weights * tf.nn.softmax_cross_entropy_with_logits(
                    logits=output, labels=ncomputer.target_output))

            summaries = []

            gradients = optimizer.compute_gradients(loss)
            for i, (grad, var) in enumerate(gradients):
                if grad is not None:
                    gradients[i] = (tf.clip_by_value(grad, -10, 10), var)
            for (grad, var) in gradients:
                if grad is not None:
                    summaries.append(
                        tf.compat.v1.summary.histogram(var.name + '/grad',
                                                       grad))

            apply_gradients = optimizer.apply_gradients(gradients)

            summaries.append(tf.compat.v1.summary.scalar("Loss", loss))
            summarize_op = tf.compat.v1.summary.merge(summaries)
            no_summarize = tf.no_op()

            llprint("Done!\n")

            llprint("Initializing Variables ... ")
            session.run(tf.compat.v1.global_variables_initializer())
            llprint("Done!\n")

            if from_checkpoint is not None:
                llprint("Restoring Checkpoint %s ... " % from_checkpoint)
                ncomputer.restore(session, ckpts_dir, from_checkpoint)
                llprint("Done!\n")

            last_100_losses = []

            start = 0 if start_step == 0 else start_step + 1
            end = start_step + iterations + 1

            start_time_100 = time.time()
            avg_100_time = 0.
            avg_counter = 0

            for i in range(start, end + 1):
                try:
                    llprint("\rIteration %d/%d" % (i, end))

                    sample = np.random.choice(data, 1)
                    input_data, target_output, seq_len, weights = prepare_sample(
                        sample, lexicon_dict['#'], dict_size)

                    summarize = (i % 100 == 0)
                    take_checkpoint = (i != 0) and (i % end == 0)

                    loss_value, _, summary = session.run(
                        [
                            loss, apply_gradients,
                            summarize_op if summarize else no_summarize
                        ],
                        feed_dict={
                            ncomputer.input_data: input_data,
                            ncomputer.target_output: target_output,
                            ncomputer.sequence_length: seq_len,
                            loss_weights: weights
                        })

                    last_100_losses.append(loss_value)
                    if summarize:
                        summarizer.add_summary(summary, i)
                        llprint("\n\tAvg. Cross-Entropy: %.7f\n" %
                                (np.mean(last_100_losses)))

                        end_time_100 = time.time()
                        elapsed_time = (end_time_100 - start_time_100) / 60
                        avg_counter += 1
                        avg_100_time += (1. / avg_counter) * (elapsed_time -
                                                              avg_100_time)
                        estimated_time = (avg_100_time *
                                          ((end - i) / 100.)) / 60.

                        print("\tAvg. 100 iterations time: %.2f minutes" %
                              avg_100_time)
                        print("\tApprox. time to completion: %.2f hours" %
                              estimated_time)

                        start_time_100 = time.time()
                        last_100_losses = []

                    if take_checkpoint:
                        llprint("\nSaving Checkpoint ... "),
                        ncomputer.save(session, ckpts_dir, 'step-%d' % i)
                        llprint("Done!\n")

                except KeyboardInterrupt:

                    llprint("\nSaving Checkpoint ... "),
                    ncomputer.save(session, ckpts_dir, 'step-%d' % i)
                    llprint("Done!\n")
                    sys.exit(0)
예제 #7
0
    def test_call(self):
        graph = tf.Graph()
        with graph.as_default():
            with tf.Session(graph=graph) as session:

                computer = DNC(DummyController,
                               10,
                               20,
                               10,
                               10,
                               64,
                               2,
                               batch_size=3)
                rcomputer = DNC(DummyRecurrentController,
                                10,
                                20,
                                10,
                                10,
                                64,
                                2,
                                batch_size=3)
                input_batches = np.random.uniform(0, 1, (3, 5, 10)).astype(
                    np.float32)

                session.run(tf.initialize_all_variables())
                out_view, M, L, u, p, r, wr, ww = session.run(
                    [
                        computer.get_outputs(), computer.memory.memory_matrix,
                        computer.memory.link_matrix,
                        computer.memory.usage_vector,
                        computer.memory.precedence_vector,
                        computer.memory.read_vectors,
                        computer.memory.read_weightings,
                        computer.memory.write_weighting
                    ],
                    feed_dict={
                        computer.input_data: input_batches,
                        computer.sequence_length: 5
                    })
                out, view = out_view

                rout_rview, rM, rL, ru, rp, rr, rwr, rww, ro, rs = session.run(
                    [
                        rcomputer.get_outputs(),
                        rcomputer.memory.memory_matrix,
                        rcomputer.memory.link_matrix,
                        rcomputer.memory.usage_vector,
                        rcomputer.memory.precedence_vector,
                        rcomputer.memory.read_vectors,
                        rcomputer.memory.read_weightings,
                        rcomputer.memory.write_weighting,
                        rcomputer.controller.get_state()[0],
                        rcomputer.controller.get_state()[1]
                    ],
                    feed_dict={
                        rcomputer.input_data: input_batches,
                        rcomputer.sequence_length: 5
                    })
                rout, rview = rout_rview

                self.assertEqual(out.shape, (3, 5, 20))
                self.assertEqual(view['free_gates'].shape, (3, 5, 2))
                self.assertEqual(view['allocation_gates'].shape, (3, 5, 1))
                self.assertEqual(view['write_gates'].shape, (3, 5, 1))
                self.assertEqual(view['read_weightings'].shape, (3, 5, 10, 2))
                self.assertEqual(view['write_weightings'].shape, (3, 5, 10))

                self.assertFalse(
                    np.array_equal(M, np.zeros((3, 10, 64), dtype=np.float32)))
                self.assertFalse(
                    np.array_equal(L, np.zeros((3, 10, 10), dtype=np.float32)))
                self.assertFalse(
                    np.array_equal(u, np.zeros((3, 10), dtype=np.float32)))
                self.assertFalse(
                    np.array_equal(p, np.zeros((3, 10), dtype=np.float32)))
                self.assertFalse(
                    np.array_equal(r, np.zeros((3, 64, 2), dtype=np.float32)))
                self.assertFalse(
                    np.array_equal(wr, np.zeros((3, 10, 2), dtype=np.float32)))
                self.assertFalse(
                    np.array_equal(ww, np.zeros((3, 10), dtype=np.float32)))

                self.assertEqual(rout.shape, (3, 5, 20))
                self.assertEqual(rview['free_gates'].shape, (3, 5, 2))
                self.assertEqual(rview['allocation_gates'].shape, (3, 5, 1))
                self.assertEqual(rview['write_gates'].shape, (3, 5, 1))
                self.assertEqual(rview['read_weightings'].shape, (3, 5, 10, 2))
                self.assertEqual(rview['write_weightings'].shape, (3, 5, 10))

                self.assertFalse(
                    np.array_equal(rM, np.zeros((3, 10, 64),
                                                dtype=np.float32)))
                self.assertFalse(
                    np.array_equal(rL, np.zeros((3, 10, 10),
                                                dtype=np.float32)))
                self.assertFalse(
                    np.array_equal(ru, np.zeros((3, 10), dtype=np.float32)))
                self.assertFalse(
                    np.array_equal(rp, np.zeros((3, 10), dtype=np.float32)))
                self.assertFalse(
                    np.array_equal(rr, np.zeros((3, 64, 2), dtype=np.float32)))
                self.assertFalse(
                    np.array_equal(rwr, np.zeros((3, 10, 2),
                                                 dtype=np.float32)))
                self.assertFalse(
                    np.array_equal(rww, np.zeros((3, 10), dtype=np.float32)))
                self.assertFalse(
                    np.array_equal(ro, np.zeros((3, 64), dtype=np.float32)))
                self.assertFalse(
                    np.array_equal(rs, np.zeros((3, 64), dtype=np.float32)))