with graph.as_default(): with tf.Session(graph=graph) as session: llprint("Building Computational Graph ... ") optimizer = tf.train.RMSPropOptimizer(learning_rate, momentum=momentum) summerizer = tf.train.SummaryWriter(tb_logs_dir, session.graph) ncomputer = DNC(FeedforwardController, input_size, output_size, 2 * sequence_max_length + 1, words_count, word_size, read_heads, batch_size) # squash the DNC output between 0 and 1 output, _ = ncomputer.get_outputs() squashed_output = tf.clip_by_value(tf.sigmoid(output), 1e-6, 1. - 1e-6) loss = binary_cross_entropy(squashed_output, ncomputer.target_output) gradients = optimizer.compute_gradients(loss) for i, (grad, var) in enumerate(gradients): if grad is not None: #with tf.control_dependencies([tf.Print(tf.zeros(1), [var.name, tf.is_nan(grad)])]): gradients[i] = (tf.clip_by_value(grad, -10, 10), var) apply_gradients = optimizer.apply_gradients(gradients) summerize_loss = tf.scalar_summary("Loss", loss)
edges = data_dict["edge"] metro_graph = data_dict["graph"] with graph.as_default(): with tf.Session(graph=graph) as session: llprint("Building Computational Graph ... ") optimizer = tf.train.RMSPropOptimizer(learning_rate, momentum=momentum) ncomputer = DNC(RecurrentController, input_size, output_size, sequence_max_length, words_count, word_size, read_heads, batch_size) output, memory_views = ncomputer.get_outputs() loss = None for _k in range(9): tmp_loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2( logits=output[:, :, _k * 10:(_k + 1) * 10], labels=ncomputer.target_output[:, :, _k * 10:(_k + 1) * 10], name="categorical_loss_" + str(_k + 1))) if loss is None: loss = tmp_loss else: loss = loss + tmp_loss loss = loss / 9.0 # print(loss)
def main(): """ Runs an interactive shell where the user can submit input with their chosen deliminator and see the output of the DNC's latest checkpoint. :return: None """ dir_path = os.path.dirname(os.path.realpath(__file__)) ckpts_dir = os.path.join(dir_path, 'checkpoints') lexicon_dictionary = load( os.path.join(dir_path, 'data', 'encoded', 'lexicon-dict.pkl')) target_code = lexicon_dictionary["#"] graph = tf.Graph() with graph.as_default(): with tf.compat.v1.Session(graph=graph) as session: ncomputer = DNC( RecurrentController, input_size=len(lexicon_dictionary), output_size=len(lexicon_dictionary), max_sequence_length=100, memory_words_num=256, memory_word_size=64, memory_read_heads=4, ) ncomputer.restore(session, ckpts_dir, 'step-100001') outputs, _ = ncomputer.get_outputs() softmaxed = tf.nn.softmax(outputs) print( "This is an interactive shell script. Here a user may test a trained neural network by passing it " "custom inputs and seeing if they elicid the desired output. \n Please note that a user may only " "test inputs that consists of words in the neural network's lexicon. If the user would like to quit" " the program, they can type ':q!' when prompted for an input. \n If the user would like to see the" " network's lexicon, they can type ':dict' when prompted for an input. Otherwise, the user may " "simply type the sequence of inputs that they would like to use and then hit the enter key. \n " "They will then be asked to specify the deliminator that distinguishes one word from another word." " The input will then be split using that deliminator. \n If all resulting inputs are in the " "network's lexicon, the network will then be fed these inputs and its output will be printed for " "the user along with its expected output.") my_input = input("Input:") while my_input != ":q!": if my_input == ":dict": print( "The neural network has been trained to recognize the following words:" ) print(lexicon_dictionary) my_input = input("Input:") continue deliminator = input("Deliminator:") story = my_input.split(deliminator) if not set(story).issubset(lexicon_dictionary): print("You may only test key in the lexicon dictionary.") my_input = input("Input:") continue desired_answers = get_solution(story) encoded_story = [] encoded_answers = [] for an_input in story: encoded_story.append(lexicon_dictionary[an_input]) for an_output in desired_answers: encoded_answers.append(lexicon_dictionary[an_output]) input_vec, _, seq_len, _ = prepare_sample( [encoded_story], encoded_answers, target_code, len(lexicon_dictionary)) softmax_output = session.run(softmaxed, feed_dict={ ncomputer.input_data: input_vec, ncomputer.sequence_length: seq_len }) softmax_output = np.squeeze(softmax_output, axis=0) given_answers = np.argmax( softmax_output[:len(desired_answers)], axis=1) print("Output: ", [ list(lexicon_dictionary.keys())[list( lexicon_dictionary.values()).index(an_answer)] for an_answer in given_answers ]) is_correct = True if len(given_answers) != len(encoded_answers): is_correct = False else: for i in range(len(given_answers)): if given_answers[i] != encoded_answers[i]: is_correct = False if is_correct: print("Correct!") else: print("Expected: ", desired_answers) my_input = input("Input:")
def main(): """ Train the DNC to take answer questions from the DREAM dataset. :return: None. """ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) dirname = os.path.dirname(__file__) ckpts_dir = os.path.join(dirname, 'checkpoints/') data_dir = os.path.join(dirname, 'data', 'encoded') tb_logs_dir = os.path.join(dirname, 'logs') llprint("Loading Data ... ") lexicon_dict = load(os.path.join(data_dir, 'lexicon-dict.pkl')) data_files = os.listdir(os.path.join(data_dir, 'train')) llprint("Done!\n") batch_size = 1 input_size = output_size = len(lexicon_dict) sequence_max_length = 100 word_space_size = len(lexicon_dict) words_count = 256 word_size = 64 read_heads = 4 learning_rate = 1e-4 momentum = 0.9 from_checkpoint = None iterations = 100000 start_step = 0 options, _ = getopt.getopt(sys.argv[1:], '', ['checkpoint=', 'iterations=', 'start=']) for opt in options: if opt[0] == '--checkpoint': from_checkpoint = opt[1] print("Checkpoint found") elif opt[0] == '--iterations': iterations = int(opt[1]) elif opt[0] == '--start': start_step = int(opt[1]) graph = tf.Graph() with graph.as_default(): with tf.compat.v1.Session(graph=graph) as session: llprint("Building Computational Graph ... ") optimizer = tf.compat.v1.train.RMSPropOptimizer(learning_rate, momentum=momentum) summarizer = tf.compat.v1.summary.FileWriter( tb_logs_dir, session.graph) ncomputer = DNC(RecurrentController, input_size, output_size, sequence_max_length, words_count, word_size, read_heads, batch_size) output, _ = ncomputer.get_outputs() loss_weights = tf.compat.v1.placeholder(tf.float32, [batch_size, None, 1]) loss = tf.reduce_mean( loss_weights * tf.nn.softmax_cross_entropy_with_logits( logits=output, labels=ncomputer.target_output)) summaries = [] gradients = optimizer.compute_gradients(loss) for i, (grad, var) in enumerate(gradients): if grad is not None: gradients[i] = (tf.clip_by_value(grad, -10, 10), var) for (grad, var) in gradients: if grad is not None: summaries.append( tf.compat.v1.summary.histogram(var.name + '/grad', grad)) apply_gradients = optimizer.apply_gradients(gradients) summaries.append(tf.compat.v1.summary.scalar("Loss", loss)) summarize_op = tf.compat.v1.summary.merge(summaries) no_summarize = tf.no_op() llprint("Done!\n") llprint("Initializing Variables ... ") session.run(tf.compat.v1.global_variables_initializer()) llprint("Done!\n") if from_checkpoint is not None: llprint("Restoring Checkpoint %s ... " % from_checkpoint) ncomputer.restore(session, ckpts_dir, from_checkpoint) llprint("Done!\n") elif os.path.exists(ckpts_dir): checkpoints = os.listdir(ckpts_dir) if len(checkpoints) != 0 and any("step-" in s for s in checkpoints): checkpoint_numbers = [ int(checkpoint[checkpoint.find("-") + 1:]) for checkpoint in checkpoints if checkpoint[checkpoint.find("-") + 1:].isnumeric() ] checkpoint_numbers.sort() ncomputer.restore(session, ckpts_dir, f"step-{checkpoint_numbers[-1]}") start = checkpoint_numbers[-1] end = 100000 last_100_losses = [] if not 'start' in locals(): start = 0 end = 100000 if from_checkpoint is not None: start = int(from_checkpoint[from_checkpoint.find("-") + 1:]) start_time_100 = time.time() end_time_100 = None avg_100_time = 0. avg_counter = 0 for i in range(start, end + 1): try: llprint("\rIteration %d/%d" % (i, end)) sample = np.random.choice(data_files, 1) with open(os.path.join(data_dir, 'train', sample[0])) as f: sample = json.load(f) input_data, target_output, seq_len, weights = prepare_sample( sample, lexicon_dict['='], word_space_size, lexicon_dict) summarize = (i % 100 == 0) take_checkpoint = (i != 0) and (i % 200 == 0) #For debugging outputs, _ = ncomputer.get_outputs() softmaxed = tf.nn.softmax(outputs) loss_value, _, summary, softmax_output = session.run( [ loss, apply_gradients, summarize_op if summarize else no_summarize, softmaxed ], feed_dict={ ncomputer.input_data: input_data, ncomputer.target_output: target_output, ncomputer.sequence_length: seq_len, loss_weights: weights }) softmax_output = np.squeeze(softmax_output, axis=0) given_answers = np.argmax(softmax_output, axis=1) words = [] for an_array in target_output[0]: for word in np.where(an_array == 1): words.extend([ list(lexicon_dict.keys())[np.where( an_array == 1)[0][0]] ]) last_100_losses.append(loss_value) if summarize: print("\n\tLoss value: ", loss_value) print("\tTarget output: ", words) print("\tOutput: ", [ list(lexicon_dict.keys())[num] for num in given_answers ]) summarizer.add_summary(summary, i) llprint("\tAvg. Cross-Entropy: %.7f\n" % (np.mean(last_100_losses))) end_time_100 = time.time() elapsed_time = (end_time_100 - start_time_100) / 60 avg_counter += 1 avg_100_time += (1. / avg_counter) * (elapsed_time - avg_100_time) estimated_time = (avg_100_time * ((end - i) / 100.)) / 60. print("\tAvg. 100 iterations time: %.2f minutes" % avg_100_time) print("\tApprox. time to completion: %.2f hours\n" % estimated_time) start_time_100 = time.time() last_100_losses = [] if take_checkpoint: llprint("\nSaving Checkpoint ... line 237 "), ncomputer.save(session, ckpts_dir, 'step-%d' % i) llprint("Done!\n") except KeyboardInterrupt: llprint("\nSaving Checkpoint ... "), ncomputer.save(session, ckpts_dir, 'step-%d' % i) llprint("Done!\n") sys.exit(0)
with graph.as_default(): with tf.Session(graph=graph) as session: llprint("Building Computational Graph ... ") ncomputer = DNC(FeedforwardController, input_size, output_size, sequence_max_length, words_count, word_size, read_heads, batch_size) if LOG_GRAPH_WITHOUT_OPTIMIZER: summerizer = tf.train.SummaryWriter(tb_logs_dir, session.graph) session.run(tf.initialize_all_variables()) exit() # squash the DNC output between 0 and 1 output, packed_memory_view = ncomputer.get_outputs() squashed_output = tf.clip_by_value(tf.sigmoid(output), 1e-6, 1. - 1e-6) loss = binary_cross_entropy(squashed_output, ncomputer.target_output) summeries = [] optimizer = tf.train.RMSPropOptimizer(learning_rate, momentum=momentum) gradients = optimizer.compute_gradients(loss) for i, (grad, var) in enumerate(gradients): if grad is not None: summeries.append( tf.histogram_summary(var.name + '/grad', grad))
def main(): """ Train the DNC to take a word and list its instances of vowels in order of occurrence. :return: None. """ dirname = os.path.dirname(__file__) ckpts_dir = os.path.join(dirname, 'checkpoints') data_dir = os.path.join(dirname, 'data', 'encoded') tb_logs_dir = os.path.join(dirname, 'logs') llprint("Loading Data ... ") lexicon_dict = load(os.path.join(data_dir, 'lexicon-dict.pkl')) data = load(os.path.join(data_dir, 'train', 'train.pkl')) llprint("Done!\n") batch_size = 1 input_size = output_size = len(lexicon_dict) sequence_max_length = 100 dict_size = len(lexicon_dict) words_count = 256 word_size = 64 read_heads = 4 learning_rate = 1e-4 momentum = 0.9 from_checkpoint = None iterations = 100000 start_step = 0 options, _ = getopt.getopt(sys.argv[1:], '', ['checkpoint=', 'iterations=', 'start=']) for opt in options: if opt[0] == '--checkpoint': from_checkpoint = opt[1] elif opt[0] == '--iterations': iterations = int(opt[1]) elif opt[0] == '--start': start_step = int(opt[1]) graph = tf.Graph() with graph.as_default(): with tf.compat.v1.Session(graph=graph) as session: llprint("Building Computational Graph ... ") optimizer = tf.compat.v1.train.RMSPropOptimizer(learning_rate, momentum=momentum) summarizer = tf.compat.v1.summary.FileWriter( tb_logs_dir, session.graph) ncomputer = DNC(RecurrentController, input_size, output_size, sequence_max_length, words_count, word_size, read_heads, batch_size) output, _ = ncomputer.get_outputs() loss_weights = tf.compat.v1.placeholder(tf.float32, [batch_size, None, 1]) loss = tf.reduce_mean( loss_weights * tf.nn.softmax_cross_entropy_with_logits( logits=output, labels=ncomputer.target_output)) summaries = [] gradients = optimizer.compute_gradients(loss) for i, (grad, var) in enumerate(gradients): if grad is not None: gradients[i] = (tf.clip_by_value(grad, -10, 10), var) for (grad, var) in gradients: if grad is not None: summaries.append( tf.compat.v1.summary.histogram(var.name + '/grad', grad)) apply_gradients = optimizer.apply_gradients(gradients) summaries.append(tf.compat.v1.summary.scalar("Loss", loss)) summarize_op = tf.compat.v1.summary.merge(summaries) no_summarize = tf.no_op() llprint("Done!\n") llprint("Initializing Variables ... ") session.run(tf.compat.v1.global_variables_initializer()) llprint("Done!\n") if from_checkpoint is not None: llprint("Restoring Checkpoint %s ... " % from_checkpoint) ncomputer.restore(session, ckpts_dir, from_checkpoint) llprint("Done!\n") last_100_losses = [] start = 0 if start_step == 0 else start_step + 1 end = start_step + iterations + 1 start_time_100 = time.time() avg_100_time = 0. avg_counter = 0 for i in range(start, end + 1): try: llprint("\rIteration %d/%d" % (i, end)) sample = np.random.choice(data, 1) input_data, target_output, seq_len, weights = prepare_sample( sample, lexicon_dict['#'], dict_size) summarize = (i % 100 == 0) take_checkpoint = (i != 0) and (i % end == 0) loss_value, _, summary = session.run( [ loss, apply_gradients, summarize_op if summarize else no_summarize ], feed_dict={ ncomputer.input_data: input_data, ncomputer.target_output: target_output, ncomputer.sequence_length: seq_len, loss_weights: weights }) last_100_losses.append(loss_value) if summarize: summarizer.add_summary(summary, i) llprint("\n\tAvg. Cross-Entropy: %.7f\n" % (np.mean(last_100_losses))) end_time_100 = time.time() elapsed_time = (end_time_100 - start_time_100) / 60 avg_counter += 1 avg_100_time += (1. / avg_counter) * (elapsed_time - avg_100_time) estimated_time = (avg_100_time * ((end - i) / 100.)) / 60. print("\tAvg. 100 iterations time: %.2f minutes" % avg_100_time) print("\tApprox. time to completion: %.2f hours" % estimated_time) start_time_100 = time.time() last_100_losses = [] if take_checkpoint: llprint("\nSaving Checkpoint ... "), ncomputer.save(session, ckpts_dir, 'step-%d' % i) llprint("Done!\n") except KeyboardInterrupt: llprint("\nSaving Checkpoint ... "), ncomputer.save(session, ckpts_dir, 'step-%d' % i) llprint("Done!\n") sys.exit(0)
def test_call(self): graph = tf.Graph() with graph.as_default(): with tf.Session(graph=graph) as session: computer = DNC(DummyController, 10, 20, 10, 10, 64, 2, batch_size=3) rcomputer = DNC(DummyRecurrentController, 10, 20, 10, 10, 64, 2, batch_size=3) input_batches = np.random.uniform(0, 1, (3, 5, 10)).astype( np.float32) session.run(tf.initialize_all_variables()) out_view, M, L, u, p, r, wr, ww = session.run( [ computer.get_outputs(), computer.memory.memory_matrix, computer.memory.link_matrix, computer.memory.usage_vector, computer.memory.precedence_vector, computer.memory.read_vectors, computer.memory.read_weightings, computer.memory.write_weighting ], feed_dict={ computer.input_data: input_batches, computer.sequence_length: 5 }) out, view = out_view rout_rview, rM, rL, ru, rp, rr, rwr, rww, ro, rs = session.run( [ rcomputer.get_outputs(), rcomputer.memory.memory_matrix, rcomputer.memory.link_matrix, rcomputer.memory.usage_vector, rcomputer.memory.precedence_vector, rcomputer.memory.read_vectors, rcomputer.memory.read_weightings, rcomputer.memory.write_weighting, rcomputer.controller.get_state()[0], rcomputer.controller.get_state()[1] ], feed_dict={ rcomputer.input_data: input_batches, rcomputer.sequence_length: 5 }) rout, rview = rout_rview self.assertEqual(out.shape, (3, 5, 20)) self.assertEqual(view['free_gates'].shape, (3, 5, 2)) self.assertEqual(view['allocation_gates'].shape, (3, 5, 1)) self.assertEqual(view['write_gates'].shape, (3, 5, 1)) self.assertEqual(view['read_weightings'].shape, (3, 5, 10, 2)) self.assertEqual(view['write_weightings'].shape, (3, 5, 10)) self.assertFalse( np.array_equal(M, np.zeros((3, 10, 64), dtype=np.float32))) self.assertFalse( np.array_equal(L, np.zeros((3, 10, 10), dtype=np.float32))) self.assertFalse( np.array_equal(u, np.zeros((3, 10), dtype=np.float32))) self.assertFalse( np.array_equal(p, np.zeros((3, 10), dtype=np.float32))) self.assertFalse( np.array_equal(r, np.zeros((3, 64, 2), dtype=np.float32))) self.assertFalse( np.array_equal(wr, np.zeros((3, 10, 2), dtype=np.float32))) self.assertFalse( np.array_equal(ww, np.zeros((3, 10), dtype=np.float32))) self.assertEqual(rout.shape, (3, 5, 20)) self.assertEqual(rview['free_gates'].shape, (3, 5, 2)) self.assertEqual(rview['allocation_gates'].shape, (3, 5, 1)) self.assertEqual(rview['write_gates'].shape, (3, 5, 1)) self.assertEqual(rview['read_weightings'].shape, (3, 5, 10, 2)) self.assertEqual(rview['write_weightings'].shape, (3, 5, 10)) self.assertFalse( np.array_equal(rM, np.zeros((3, 10, 64), dtype=np.float32))) self.assertFalse( np.array_equal(rL, np.zeros((3, 10, 10), dtype=np.float32))) self.assertFalse( np.array_equal(ru, np.zeros((3, 10), dtype=np.float32))) self.assertFalse( np.array_equal(rp, np.zeros((3, 10), dtype=np.float32))) self.assertFalse( np.array_equal(rr, np.zeros((3, 64, 2), dtype=np.float32))) self.assertFalse( np.array_equal(rwr, np.zeros((3, 10, 2), dtype=np.float32))) self.assertFalse( np.array_equal(rww, np.zeros((3, 10), dtype=np.float32))) self.assertFalse( np.array_equal(ro, np.zeros((3, 64), dtype=np.float32))) self.assertFalse( np.array_equal(rs, np.zeros((3, 64), dtype=np.float32)))