def test_restore(self): current_dir = os.path.dirname(__file__) ckpts_dir = os.path.join(current_dir, 'checkpoints') model1_output, model1_memview = None, None sample_input = np.random.uniform(0, 1, (2, 5, 10)).astype(np.float32) sample_seq_len = 5 graph1 = tf.Graph() with graph1.as_default(): with tf.Session(graph=graph1) as session1: computer = DNC(DummyController, 10, 20, 10, 10, 64, 2, batch_size=2) session1.run(tf.initialize_all_variables()) saved_weights = session1.run([ computer.controller.nn_output_weights, computer.controller.interface_weights, computer.controller.mem_output_weights, computer.controller.W, computer.controller.b ]) computer.save(session1, ckpts_dir, 'test-restore') graph2 = tf.Graph() with graph2.as_default(): with tf.Session(graph=graph2) as session2: computer = DNC(DummyController, 10, 20, 10, 10, 64, 2, batch_size=2) session2.run(tf.initialize_all_variables()) computer.restore(session2, ckpts_dir, 'test-restore') restored_weights = session2.run([ computer.controller.nn_output_weights, computer.controller.interface_weights, computer.controller.mem_output_weights, computer.controller.W, computer.controller.b ]) self.assertTrue( np.product([ np.array_equal(restored_weights[i], saved_weights[i]) for i in range(5) ]))
apply_gradients = optimizer.apply_gradients(gradients) summeries.append(tf.summary.scalar("Loss", loss)) summerize_op = tf.summary.merge(summeries) no_summerize = tf.no_op() llprint("Done!\n") llprint("Initializing Variables ... ") session.run(tf.global_variables_initializer()) llprint("Done!\n") if from_checkpoint is not None: llprint("Restoring Checkpoint %s ... " % (from_checkpoint)) ncomputer.restore(session, ckpts_dir, from_checkpoint) llprint("Done!\n") last_100_losses = [] start = 0 if start_step == 0 else start_step + 1 end = start_step + iterations + 1 start_time_100 = time.time() end_time_100 = None avg_100_time = 0. avg_counter = 0 for i in range(start, end + 1): try:
graph = tf.Graph() with graph.as_default(): with tf.Session(graph=graph) as session: ncomputer = DNC( RecurrentController, input_size=len(lexicon_dictionary), output_size=len(lexicon_dictionary), max_sequence_length=100, memory_words_num=256, memory_word_size=64, memory_read_heads=4, ) ncomputer.restore(session, ckpts_dir, 'step-30001') outputs, _ = ncomputer.get_outputs() softmaxed = tf.nn.softmax(outputs) tasks_results = {} tasks_names = {} for test_file in test_files: test_data = load(test_file) task_regexp = r'qa([0-9]{1,2})_([a-z\-]*)_test.txt.pkl' task_filename = os.path.basename(test_file) task_match_obj = re.match(task_regexp, task_filename) task_number = task_match_obj.group(1) task_name = task_match_obj.group(2).replace('-', ' ') tasks_names[task_number] = task_name counter = 0
output_size=len(lexicon_dictionary), max_sequence_length=100, memory_words_num=256, memory_word_size=64, memory_read_heads=4, ) checkpoints = os.listdir(ckpts_dir) if len(checkpoints) != 0: checkpoint_numbers = [ int(checkpoint[checkpoint.find("-") + 1:]) for checkpoint in checkpoints if checkpoint[checkpoint.find("-") + 1:].isnumeric() ] checkpoint_numbers.sort() ncomputer.restore(session, ckpts_dir, f"step-{checkpoint_numbers[-1]}") else: raise FileNotFoundError("No checkpoint to test.") outputs, _ = ncomputer.get_outputs() softmaxed = tf.nn.softmax(outputs) test_names = [] test_data = [] test_data_types = [] for counter, test_file in enumerate(test_files): task_regexp = r'([0-9])+-([0-9])+_test([0-9])+.json' task_filename = os.path.basename(test_file) task_match_obj = re.match(task_regexp, task_filename) if task_match_obj: with open(test_file) as f:
graph = tf.Graph() with graph.as_default(): with tf.Session(graph=graph) as session: ncomputer = DNC( RecurrentController, input_size=len(lexicon_dictionary), output_size=len(lexicon_dictionary), max_sequence_length=100, memory_words_num=256, memory_word_size=64, memory_read_heads=4, ) ncomputer.restore(session, ckpts_dir, 'step-500005') outputs, _ = ncomputer.get_outputs() softmaxed = tf.nn.softmax(outputs) tasks_results = {} tasks_names = {} for test_file in test_files: test_data = load(test_file) task_regexp = r'qa([0-9]{1,2})_([a-z\-]*)_test.txt.pkl' task_filename = os.path.basename(test_file) task_match_obj = re.match(task_regexp, task_filename) task_number = task_match_obj.group(1) task_name = task_match_obj.group(2).replace('-', ' ') tasks_names[task_number] = task_name counter = 0
def main(): """ Tests the latest checkpoint of the DNC that was trained on the vowels task. In this task, the DNC is given an input that consist of a sequence of letters and asked to return any vowels contained in that sequence in order of their appearance in the sequence. For simplicity's sake, y is not considered a vowel. :return: None. """ ckpts_dir = './checkpoints/' lexicon_dictionary = load('./data/encoded/lexicon-dict.pkl') target_code = lexicon_dictionary["#"] test_files = [] for entry_name in os.listdir('./data/encoded/test/'): entry_path = os.path.join('./data/encoded/test/', entry_name) if os.path.isfile(entry_path): test_files.append(entry_path) graph = tf.Graph() with graph.as_default(): with tf.compat.v1.Session(graph=graph) as session: ncomputer = DNC( RecurrentController, input_size=len(lexicon_dictionary), output_size=len(lexicon_dictionary), max_sequence_length=100, memory_words_num=256, memory_word_size=64, memory_read_heads=4, ) ncomputer.restore(session, ckpts_dir, 'step-100001') outputs, _ = ncomputer.get_outputs() softmaxed = tf.nn.softmax(outputs) tasks_results = {} tasks_numbers = [] counter = 0 for test_file in test_files: test_data = load(test_file) task_regexp = r'([0-9]{1,4})test.txt.pkl' task_filename = os.path.basename(test_file) task_match_obj = re.match(task_regexp, task_filename) task_number = task_match_obj.group(1) tasks_numbers.append(task_number) results = [] for story in test_data: a_story = np.array(story['inputs']) # Bool vector indicating if the target code is the value at that index in a_story target_mask_1 = (a_story == target_code) target_mask = target_mask_1.copy() # Sets the first target code appearance to False so that it will remain in answer target_mask[np.where(target_mask_1 == True)[0][0]] = False desired_answers = np.array(story['outputs']) input_vec, seq_len = prepare_sample( [story], len(lexicon_dictionary)) softmax_output = session.run(softmaxed, feed_dict={ ncomputer.input_data: input_vec, ncomputer.sequence_length: seq_len }) softmax_output = np.squeeze(softmax_output, axis=0) given_answers = np.argmax(softmax_output[target_mask], axis=1) is_correct = True if len(given_answers) != len(desired_answers): is_correct = False else: for i in range(len(given_answers)): if given_answers[i] != desired_answers[i]: is_correct = False if not is_correct: print("\nGiven: ", given_answers) print("Expected: ", desired_answers) results.append(False) else: results.append(True) counter += 1 llprint("\rTests Completed ... %d/%d" % (counter, len(test_files))) error_rate = 1. - np.mean(results) tasks_results[task_number] = error_rate print("\n") print( "-------------------------------------------------------------------" ) all_tasks_results = [v for _, v in tasks_results.items()] results_mean = "%.2f%%" % (np.mean(all_tasks_results) * 100) failed_count = "%d" % (np.sum(np.array(all_tasks_results) > 0.05)) print("%-27s%-27s" % ("Percent Failed", results_mean)) print("%-27s%-27s" % ("Total Failed", failed_count))
def main(): """ Runs an interactive shell where the user can submit input with their chosen deliminator and see the output of the DNC's latest checkpoint. :return: None """ dir_path = os.path.dirname(os.path.realpath(__file__)) ckpts_dir = os.path.join(dir_path, 'checkpoints') lexicon_dictionary = load( os.path.join(dir_path, 'data', 'encoded', 'lexicon-dict.pkl')) target_code = lexicon_dictionary["#"] graph = tf.Graph() with graph.as_default(): with tf.compat.v1.Session(graph=graph) as session: ncomputer = DNC( RecurrentController, input_size=len(lexicon_dictionary), output_size=len(lexicon_dictionary), max_sequence_length=100, memory_words_num=256, memory_word_size=64, memory_read_heads=4, ) ncomputer.restore(session, ckpts_dir, 'step-100001') outputs, _ = ncomputer.get_outputs() softmaxed = tf.nn.softmax(outputs) print( "This is an interactive shell script. Here a user may test a trained neural network by passing it " "custom inputs and seeing if they elicid the desired output. \n Please note that a user may only " "test inputs that consists of words in the neural network's lexicon. If the user would like to quit" " the program, they can type ':q!' when prompted for an input. \n If the user would like to see the" " network's lexicon, they can type ':dict' when prompted for an input. Otherwise, the user may " "simply type the sequence of inputs that they would like to use and then hit the enter key. \n " "They will then be asked to specify the deliminator that distinguishes one word from another word." " The input will then be split using that deliminator. \n If all resulting inputs are in the " "network's lexicon, the network will then be fed these inputs and its output will be printed for " "the user along with its expected output.") my_input = input("Input:") while my_input != ":q!": if my_input == ":dict": print( "The neural network has been trained to recognize the following words:" ) print(lexicon_dictionary) my_input = input("Input:") continue deliminator = input("Deliminator:") story = my_input.split(deliminator) if not set(story).issubset(lexicon_dictionary): print("You may only test key in the lexicon dictionary.") my_input = input("Input:") continue desired_answers = get_solution(story) encoded_story = [] encoded_answers = [] for an_input in story: encoded_story.append(lexicon_dictionary[an_input]) for an_output in desired_answers: encoded_answers.append(lexicon_dictionary[an_output]) input_vec, _, seq_len, _ = prepare_sample( [encoded_story], encoded_answers, target_code, len(lexicon_dictionary)) softmax_output = session.run(softmaxed, feed_dict={ ncomputer.input_data: input_vec, ncomputer.sequence_length: seq_len }) softmax_output = np.squeeze(softmax_output, axis=0) given_answers = np.argmax( softmax_output[:len(desired_answers)], axis=1) print("Output: ", [ list(lexicon_dictionary.keys())[list( lexicon_dictionary.values()).index(an_answer)] for an_answer in given_answers ]) is_correct = True if len(given_answers) != len(encoded_answers): is_correct = False else: for i in range(len(given_answers)): if given_answers[i] != encoded_answers[i]: is_correct = False if is_correct: print("Correct!") else: print("Expected: ", desired_answers) my_input = input("Input:")
def main(): """ Train the DNC to take answer questions from the DREAM dataset. :return: None. """ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) dirname = os.path.dirname(__file__) ckpts_dir = os.path.join(dirname, 'checkpoints/') data_dir = os.path.join(dirname, 'data', 'encoded') tb_logs_dir = os.path.join(dirname, 'logs') llprint("Loading Data ... ") lexicon_dict = load(os.path.join(data_dir, 'lexicon-dict.pkl')) data_files = os.listdir(os.path.join(data_dir, 'train')) llprint("Done!\n") batch_size = 1 input_size = output_size = len(lexicon_dict) sequence_max_length = 100 word_space_size = len(lexicon_dict) words_count = 256 word_size = 64 read_heads = 4 learning_rate = 1e-4 momentum = 0.9 from_checkpoint = None iterations = 100000 start_step = 0 options, _ = getopt.getopt(sys.argv[1:], '', ['checkpoint=', 'iterations=', 'start=']) for opt in options: if opt[0] == '--checkpoint': from_checkpoint = opt[1] print("Checkpoint found") elif opt[0] == '--iterations': iterations = int(opt[1]) elif opt[0] == '--start': start_step = int(opt[1]) graph = tf.Graph() with graph.as_default(): with tf.compat.v1.Session(graph=graph) as session: llprint("Building Computational Graph ... ") optimizer = tf.compat.v1.train.RMSPropOptimizer(learning_rate, momentum=momentum) summarizer = tf.compat.v1.summary.FileWriter( tb_logs_dir, session.graph) ncomputer = DNC(RecurrentController, input_size, output_size, sequence_max_length, words_count, word_size, read_heads, batch_size) output, _ = ncomputer.get_outputs() loss_weights = tf.compat.v1.placeholder(tf.float32, [batch_size, None, 1]) loss = tf.reduce_mean( loss_weights * tf.nn.softmax_cross_entropy_with_logits( logits=output, labels=ncomputer.target_output)) summaries = [] gradients = optimizer.compute_gradients(loss) for i, (grad, var) in enumerate(gradients): if grad is not None: gradients[i] = (tf.clip_by_value(grad, -10, 10), var) for (grad, var) in gradients: if grad is not None: summaries.append( tf.compat.v1.summary.histogram(var.name + '/grad', grad)) apply_gradients = optimizer.apply_gradients(gradients) summaries.append(tf.compat.v1.summary.scalar("Loss", loss)) summarize_op = tf.compat.v1.summary.merge(summaries) no_summarize = tf.no_op() llprint("Done!\n") llprint("Initializing Variables ... ") session.run(tf.compat.v1.global_variables_initializer()) llprint("Done!\n") if from_checkpoint is not None: llprint("Restoring Checkpoint %s ... " % from_checkpoint) ncomputer.restore(session, ckpts_dir, from_checkpoint) llprint("Done!\n") elif os.path.exists(ckpts_dir): checkpoints = os.listdir(ckpts_dir) if len(checkpoints) != 0 and any("step-" in s for s in checkpoints): checkpoint_numbers = [ int(checkpoint[checkpoint.find("-") + 1:]) for checkpoint in checkpoints if checkpoint[checkpoint.find("-") + 1:].isnumeric() ] checkpoint_numbers.sort() ncomputer.restore(session, ckpts_dir, f"step-{checkpoint_numbers[-1]}") start = checkpoint_numbers[-1] end = 100000 last_100_losses = [] if not 'start' in locals(): start = 0 end = 100000 if from_checkpoint is not None: start = int(from_checkpoint[from_checkpoint.find("-") + 1:]) start_time_100 = time.time() end_time_100 = None avg_100_time = 0. avg_counter = 0 for i in range(start, end + 1): try: llprint("\rIteration %d/%d" % (i, end)) sample = np.random.choice(data_files, 1) with open(os.path.join(data_dir, 'train', sample[0])) as f: sample = json.load(f) input_data, target_output, seq_len, weights = prepare_sample( sample, lexicon_dict['='], word_space_size, lexicon_dict) summarize = (i % 100 == 0) take_checkpoint = (i != 0) and (i % 200 == 0) #For debugging outputs, _ = ncomputer.get_outputs() softmaxed = tf.nn.softmax(outputs) loss_value, _, summary, softmax_output = session.run( [ loss, apply_gradients, summarize_op if summarize else no_summarize, softmaxed ], feed_dict={ ncomputer.input_data: input_data, ncomputer.target_output: target_output, ncomputer.sequence_length: seq_len, loss_weights: weights }) softmax_output = np.squeeze(softmax_output, axis=0) given_answers = np.argmax(softmax_output, axis=1) words = [] for an_array in target_output[0]: for word in np.where(an_array == 1): words.extend([ list(lexicon_dict.keys())[np.where( an_array == 1)[0][0]] ]) last_100_losses.append(loss_value) if summarize: print("\n\tLoss value: ", loss_value) print("\tTarget output: ", words) print("\tOutput: ", [ list(lexicon_dict.keys())[num] for num in given_answers ]) summarizer.add_summary(summary, i) llprint("\tAvg. Cross-Entropy: %.7f\n" % (np.mean(last_100_losses))) end_time_100 = time.time() elapsed_time = (end_time_100 - start_time_100) / 60 avg_counter += 1 avg_100_time += (1. / avg_counter) * (elapsed_time - avg_100_time) estimated_time = (avg_100_time * ((end - i) / 100.)) / 60. print("\tAvg. 100 iterations time: %.2f minutes" % avg_100_time) print("\tApprox. time to completion: %.2f hours\n" % estimated_time) start_time_100 = time.time() last_100_losses = [] if take_checkpoint: llprint("\nSaving Checkpoint ... line 237 "), ncomputer.save(session, ckpts_dir, 'step-%d' % i) llprint("Done!\n") except KeyboardInterrupt: llprint("\nSaving Checkpoint ... "), ncomputer.save(session, ckpts_dir, 'step-%d' % i) llprint("Done!\n") sys.exit(0)
graph = tf.Graph() with graph.as_default(): with tf.compat.v1.Session(graph=graph) as session: ncomputer = DNC( RecurrentController, input_size=len(lexicon_dictionary), output_size=len(lexicon_dictionary), max_sequence_length=100, memory_words_num=256, memory_word_size=64, memory_read_heads=4, ) ncomputer.restore(session, ckpts_dir, 'step-228') outputs, _ = ncomputer.get_outputs() softmaxed = tf.nn.softmax(outputs) tasks_results = {} tasks_names = {} for test_file in test_files: test_data = load(test_file) task_regexp = r'qa([0-9]{1,2})_([a-z\-]*)_test.txt.pkl' task_filename = os.path.basename(test_file) task_match_obj = re.match(task_regexp, task_filename) task_number = task_match_obj.group(1) task_name = task_match_obj.group(2).replace('-', ' ') tasks_names[task_number] = task_name counter = 0
def main(): """ Train the DNC to take a word and list its instances of vowels in order of occurrence. :return: None. """ dirname = os.path.dirname(__file__) ckpts_dir = os.path.join(dirname, 'checkpoints') data_dir = os.path.join(dirname, 'data', 'encoded') tb_logs_dir = os.path.join(dirname, 'logs') llprint("Loading Data ... ") lexicon_dict = load(os.path.join(data_dir, 'lexicon-dict.pkl')) data = load(os.path.join(data_dir, 'train', 'train.pkl')) llprint("Done!\n") batch_size = 1 input_size = output_size = len(lexicon_dict) sequence_max_length = 100 dict_size = len(lexicon_dict) words_count = 256 word_size = 64 read_heads = 4 learning_rate = 1e-4 momentum = 0.9 from_checkpoint = None iterations = 100000 start_step = 0 options, _ = getopt.getopt(sys.argv[1:], '', ['checkpoint=', 'iterations=', 'start=']) for opt in options: if opt[0] == '--checkpoint': from_checkpoint = opt[1] elif opt[0] == '--iterations': iterations = int(opt[1]) elif opt[0] == '--start': start_step = int(opt[1]) graph = tf.Graph() with graph.as_default(): with tf.compat.v1.Session(graph=graph) as session: llprint("Building Computational Graph ... ") optimizer = tf.compat.v1.train.RMSPropOptimizer(learning_rate, momentum=momentum) summarizer = tf.compat.v1.summary.FileWriter( tb_logs_dir, session.graph) ncomputer = DNC(RecurrentController, input_size, output_size, sequence_max_length, words_count, word_size, read_heads, batch_size) output, _ = ncomputer.get_outputs() loss_weights = tf.compat.v1.placeholder(tf.float32, [batch_size, None, 1]) loss = tf.reduce_mean( loss_weights * tf.nn.softmax_cross_entropy_with_logits( logits=output, labels=ncomputer.target_output)) summaries = [] gradients = optimizer.compute_gradients(loss) for i, (grad, var) in enumerate(gradients): if grad is not None: gradients[i] = (tf.clip_by_value(grad, -10, 10), var) for (grad, var) in gradients: if grad is not None: summaries.append( tf.compat.v1.summary.histogram(var.name + '/grad', grad)) apply_gradients = optimizer.apply_gradients(gradients) summaries.append(tf.compat.v1.summary.scalar("Loss", loss)) summarize_op = tf.compat.v1.summary.merge(summaries) no_summarize = tf.no_op() llprint("Done!\n") llprint("Initializing Variables ... ") session.run(tf.compat.v1.global_variables_initializer()) llprint("Done!\n") if from_checkpoint is not None: llprint("Restoring Checkpoint %s ... " % from_checkpoint) ncomputer.restore(session, ckpts_dir, from_checkpoint) llprint("Done!\n") last_100_losses = [] start = 0 if start_step == 0 else start_step + 1 end = start_step + iterations + 1 start_time_100 = time.time() avg_100_time = 0. avg_counter = 0 for i in range(start, end + 1): try: llprint("\rIteration %d/%d" % (i, end)) sample = np.random.choice(data, 1) input_data, target_output, seq_len, weights = prepare_sample( sample, lexicon_dict['#'], dict_size) summarize = (i % 100 == 0) take_checkpoint = (i != 0) and (i % end == 0) loss_value, _, summary = session.run( [ loss, apply_gradients, summarize_op if summarize else no_summarize ], feed_dict={ ncomputer.input_data: input_data, ncomputer.target_output: target_output, ncomputer.sequence_length: seq_len, loss_weights: weights }) last_100_losses.append(loss_value) if summarize: summarizer.add_summary(summary, i) llprint("\n\tAvg. Cross-Entropy: %.7f\n" % (np.mean(last_100_losses))) end_time_100 = time.time() elapsed_time = (end_time_100 - start_time_100) / 60 avg_counter += 1 avg_100_time += (1. / avg_counter) * (elapsed_time - avg_100_time) estimated_time = (avg_100_time * ((end - i) / 100.)) / 60. print("\tAvg. 100 iterations time: %.2f minutes" % avg_100_time) print("\tApprox. time to completion: %.2f hours" % estimated_time) start_time_100 = time.time() last_100_losses = [] if take_checkpoint: llprint("\nSaving Checkpoint ... "), ncomputer.save(session, ckpts_dir, 'step-%d' % i) llprint("Done!\n") except KeyboardInterrupt: llprint("\nSaving Checkpoint ... "), ncomputer.save(session, ckpts_dir, 'step-%d' % i) llprint("Done!\n") sys.exit(0)