def unfreeze_and_chat(frozen_model_path): """Summon a bot back from the dead and have a nice lil chat with it.""" tensor_dict, graph = unfreeze_bot(frozen_model_path) config = io_utils.parse_config(pretrained_dir=frozen_model_path) word_to_idx, idx_to_word = get_frozen_vocab(config) def as_words(sentence): return " ".join([tf.compat.as_str(idx_to_word[i]) for i in sentence]) with tf.Session(graph=graph) as sess: def respond_to(sentence): """Outputs response sentence (string) given input (string).""" # Convert input sentence to token-ids. sentence_tokens = io_utils.sentence_to_token_ids( tf.compat.as_bytes(sentence), word_to_idx) sentence_tokens = np.array([sentence_tokens[::-1]]) # Get output sentence from the chatbot. fetches = tensor_dict['outputs'] feed_dict = {tensor_dict['inputs']: sentence_tokens} response = sess.run(fetches=fetches, feed_dict=feed_dict) return as_words(response[0][:-1]) sentence = io_utils.get_sentence() while sentence != 'exit': resp = respond_to(sentence) print("Robot:", resp) sentence = io_utils.get_sentence() print("Farewell, human.")
def setUp(self): self.seq_len = 20 self.config = io_utils.parse_config(flags=TEST_FLAGS) self.dataset = data.TestData(self.config['dataset_params']) self.batch_size = 2 logging.basicConfig(level=logging.INFO) self.log = logging.getLogger('TestLegacyModels')
def main(argv): if FLAGS.debug: # Setting to '0': all tensorflow messages are logged. os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0' logging.basicConfig(level=logging.INFO) # Extract the merged configs/dictionaries. config = io_utils.parse_config(flags=FLAGS) if config['model_params']['decode'] and config['model_params']['reset_model']: print("Woops! You passed {decode: True, reset_model: True}." " You can't chat with a reset bot! I'll set reset to False.") config['model_params']['reset_model'] = False # If loading from pretrained, double-check that certain values are correct. # (This is not something a user need worry about -- done automatically) if FLAGS.pretrained_dir is not None: assert config['model_params']['decode'] \ and not config['model_params']['reset_model'] # Print out any non-default parameters given by user, so as to reassure # them that everything is set up properly. io_utils.print_non_defaults(config) print("Setting up %s dataset." % config['dataset']) dataset_class = locate(config['dataset']) or getattr(data, config['dataset']) dataset = dataset_class(config['dataset_params']) print("Creating", config['model'], ". . . ") bot_class = locate(config['model']) or getattr(chatbot, config['model']) bot = bot_class(dataset, config) if not config['model_params']['decode']: start_training(dataset, bot) else: start_chatting(bot)
def unfreeze_and_chat(frozen_model_path): """Summon a bot back from the dead and have a nice lil chat with it.""" tensor_dict, graph = unfreeze_bot(frozen_model_path) config = io_utils.parse_config(pretrained_dir=frozen_model_path) word_to_idx, idx_to_word = get_frozen_vocab(config) def as_words(sentence): return " ".join([tf.compat.as_str(idx_to_word[i]) for i in sentence]) with tf.Session(graph=graph) as sess: def respond_to(sentence): """Outputs response sentence (string) given input (string).""" # Convert input sentence to token-ids. sentence_tokens = io_utils.sentence_to_token_ids( tf.compat.as_bytes(sentence), word_to_idx) sentence_tokens = np.array([sentence_tokens[::-1]]) # Get output sentence from the chatbot. fetches = tensor_dict['outputs'] feed_dict={tensor_dict['inputs']: sentence_tokens} response = sess.run(fetches=fetches, feed_dict=feed_dict) return as_words(response[0][:-1]) sentence = io_utils.get_sentence() while sentence != 'exit': resp = respond_to(sentence) print("Robot:", resp) sentence = io_utils.get_sentence() print("Farewell, human.")
def test_optimize(self): """Ensure the new optimize config flag works. Right now, 'works' means it correctly determiens the true vocab size, updates it in the config file, and updates any assoc. file names. """ config = io_utils.parse_config(flags=TEST_FLAGS) logging.info(config) # Manually set vocab size to huge (non-optimal for TestData) value. config = io_utils.update_config(config=config, vocab_size=99999) self.assertEqual(config['dataset_params']['vocab_size'], 99999) self.assertEqual(config['dataset_params']['config_path'], TEST_CONFIG_PATH) # Instantiate a new dataset. # This where the 'optimize' flag comes into play, since # the dataset object is responsible for things like checking # data file paths and unique words. logging.info("Setting up %s dataset.", config['dataset']) logging.info("Passing %r for dataset_params", config['dataset_params']) dataset_class = pydoc.locate(config['dataset']) \ or getattr(data, config['dataset']) dataset = dataset_class(config['dataset_params']) self.assertIsInstance(dataset, data.TestData) self.assertNotEqual(dataset.vocab_size, 99999)
def test_basic(self): """Instantiate all supported datasets and check they satisfy basic conditions. THIS MAY TAKE A LONG TIME TO COMPLETE. Since we are testing that the supported datasets can be instantiated successfully, it necessarily means that the data must exist in proper format. Since the program will generate the proper format(s) if not found, this will take about 15 minutes if run from a completely fresh setup. Otherwise, a few seconds. :) """ if os.getenv('DATA') is None \ and not os.path.exists('/home/brandon/Datasets'): print('To run this test, please enter the path to your datasets: ') data_dir = input() else: data_dir = '/home/brandon/Datasets' for dataset_name in self.supported_datasets: logging.info('Testing %s', dataset_name) incomplete_params = { 'vocab_size': 40000, 'max_seq_len': 10} self.assertIsNotNone(incomplete_params) dataset_class = getattr(data, dataset_name) # User must specify data_dir, which we have not done yet. self.assertRaises(ValueError, dataset_class, incomplete_params) config = io_utils.parse_config(flags=TEST_FLAGS) dataset_params = config.get('dataset_params') dataset_params['data_dir'] = os.path.join( data_dir, dataset_name.lower()) dataset = dataset_class(dataset_params) # Ensure all params from DEFAULT_FULL_CONFIG['dataset_params'] # are set to a value in our dataset object. for default_key in DEFAULT_FULL_CONFIG['dataset_params']: self.assertIsNotNone(getattr(dataset, default_key)) # Check that all dataset properties exist. self.assertIsNotNone(dataset.name) self.assertIsNotNone(dataset.word_to_idx) self.assertIsNotNone(dataset.idx_to_word) self.assertIsNotNone(dataset.vocab_size) self.assertIsNotNone(dataset.max_seq_len) # Check that the properties satisfy basic expectations. self.assertEqual(len(dataset.word_to_idx), len(dataset.idx_to_word)) self.assertEqual(len(dataset.word_to_idx), dataset.vocab_size) self.assertEqual(len(dataset.idx_to_word), dataset.vocab_size) incomplete_params.clear() dataset_params.clear()
def test_merge_params(self): """Checks how parameters passed to TEST_FLAGS interact with parameters from yaml files. Expected behavior is that any params in TEST_FLAGS will override those from files, but that all values from file will be used if not explicitly passed to TEST_FLAGS. """ config = io_utils.parse_config(flags=TEST_FLAGS) # ============================================================== # Easy tests. # ============================================================== # Change model in test_flags and ensure merged config uses that model. config = update_config(config, model='ChatBot') self.assertEqual(config['model'], 'ChatBot') # Also ensure that switching back works too. config = update_config(config, model='DynamicBot') self.assertEqual(config['model'], 'DynamicBot') # Do the same for changing the dataset. config = update_config(config, dataset='TestData') self.assertEqual(config['dataset'], 'TestData') # ============================================================== # Medium tests. # ============================================================== # Ensure recursive merging works. config = update_config( config, batch_size=123, dropout_prob=0.8) logging.info(config) self.assertEqual(config['model'], self.test_config['model']) self.assertEqual(config['dataset'], self.test_config['dataset']) self.assertNotEqual(config['model_params'], self.test_config['model_params'])
def create_bot(flags=TEST_FLAGS, return_dataset=False): """Chatbot factory: Creates and returns a fresh bot. Nice for testing specific methods quickly. """ # Wipe the graph and update config if needed. tf.reset_default_graph() config = io_utils.parse_config(flags=flags) io_utils.print_non_defaults(config) # Instantiate a new dataset. print("Setting up", config['dataset'], "dataset.") dataset_class = locate(config['dataset']) \ or getattr(data, config['dataset']) dataset = dataset_class(config['dataset_params']) # Instantiate a new chatbot. print("Creating", config['model'], ". . . ") bot_class = locate(config['model']) or getattr(chatbot, config['model']) bot = bot_class(dataset, config) if return_dataset: return bot, dataset else: return bot
def test_manual_freeze(self): """Make sure we can freeze the bot, unfreeze, and still chat.""" # ================================================ # 1. Create & train bot. # ================================================ flags = TEST_FLAGS flags = flags._replace( model_params=dict(ckpt_dir=os.path.join(TEST_DIR, 'out'), reset_model=True, steps_per_ckpt=20, max_steps=40)) bot = create_bot(flags) self.assertEqual(bot.reset_model, True) # Simulate small train sesh on bot. bot.train() # ================================================ # 2. Recreate a chattable bot. # ================================================ # Recreate bot from scratch with decode set to true. logging.info("Resetting default graph . . . ") tf.reset_default_graph() flags = flags._replace( model_params={ **flags.model_params, 'reset_model': False, 'decode': True, 'max_steps': 100, 'steps_per_ckpt': 50 }) self.assertTrue(flags.model_params.get('decode')) bot = create_bot(flags) self.assertTrue(bot.is_chatting) self.assertTrue(bot.decode) print("Testing quick chat sesh . . . ") config = io_utils.parse_config(flags=flags) dataset_class = pydoc.locate(config['dataset']) \ or getattr(data, config['dataset']) dataset = dataset_class(config['dataset_params']) test_input = "How's it going?" encoder_inputs = io_utils.sentence_to_token_ids( tf.compat.as_bytes(test_input), dataset.word_to_idx) encoder_inputs = np.array([encoder_inputs[::-1]]) bot.pipeline._feed_dict = {bot.pipeline.user_input: encoder_inputs} # Get output sentence from the chatbot. _, _, response = bot.step(forward_only=True) print("Robot:", dataset.as_words(response[0][:-1])) # ================================================ # 3. Freeze the chattable bot. # ================================================ logging.info("Calling bot.freeze() . . . ") bot.freeze() # ================================================ # 4. Try to unfreeze and use it. # ================================================ logging.info("Resetting default graph . . . ") tf.reset_default_graph() logging.info("Importing frozen graph into default . . . ") frozen_graph = bot_freezer.load_graph(bot.ckpt_dir) logging.info("Extracting input/output tensors.") tensors, frozen_graph = bot_freezer.unfreeze_bot(bot.ckpt_dir) self.assertIsNotNone(tensors['inputs']) self.assertIsNotNone(tensors['outputs']) with tf.Session(graph=frozen_graph) as sess: raw_input = "How's it going?" encoder_inputs = io_utils.sentence_to_token_ids( tf.compat.as_bytes(raw_input), dataset.word_to_idx) encoder_inputs = np.array([encoder_inputs[::-1]]) feed_dict = {tensors['inputs'].name: encoder_inputs} response = sess.run(tensors['outputs'], feed_dict=feed_dict) logging.info('Reponse: %s', response)
def test_manual_freeze(self): """Make sure we can freeze the bot, unfreeze, and still chat.""" # ================================================ # 1. Create & train bot. # ================================================ flags = TEST_FLAGS flags = flags._replace(model_params=dict( ckpt_dir=os.path.join(TEST_DIR, 'out'), reset_model=True, steps_per_ckpt=20, max_steps=40)) bot = create_bot(flags) self.assertEqual(bot.reset_model, True) # Simulate small train sesh on bot. bot.train() # ================================================ # 2. Recreate a chattable bot. # ================================================ # Recreate bot from scratch with decode set to true. logging.info("Resetting default graph . . . ") tf.reset_default_graph() flags = flags._replace(model_params={ **flags.model_params, 'reset_model': False, 'decode': True, 'max_steps': 100, 'steps_per_ckpt': 50}) self.assertTrue(flags.model_params.get('decode')) bot = create_bot(flags) self.assertTrue(bot.is_chatting) self.assertTrue(bot.decode) print("Testing quick chat sesh . . . ") config = io_utils.parse_config(flags=flags) dataset_class = pydoc.locate(config['dataset']) \ or getattr(data, config['dataset']) dataset = dataset_class(config['dataset_params']) test_input = "How's it going?" encoder_inputs = io_utils.sentence_to_token_ids( tf.compat.as_bytes(test_input), dataset.word_to_idx) encoder_inputs = np.array([encoder_inputs[::-1]]) bot.pipeline._feed_dict = { bot.pipeline.user_input: encoder_inputs} # Get output sentence from the chatbot. _, _, response = bot.step(forward_only=True) print("Robot:", dataset.as_words(response[0][:-1])) # ================================================ # 3. Freeze the chattable bot. # ================================================ logging.info("Calling bot.freeze() . . . ") bot.freeze() # ================================================ # 4. Try to unfreeze and use it. # ================================================ logging.info("Resetting default graph . . . ") tf.reset_default_graph() logging.info("Importing frozen graph into default . . . ") frozen_graph = bot_freezer.load_graph(bot.ckpt_dir) logging.info("Extracting input/output tensors.") tensors, frozen_graph = bot_freezer.unfreeze_bot(bot.ckpt_dir) self.assertIsNotNone(tensors['inputs']) self.assertIsNotNone(tensors['outputs']) with tf.Session(graph=frozen_graph) as sess: raw_input = "How's it going?" encoder_inputs = io_utils.sentence_to_token_ids( tf.compat.as_bytes(raw_input), dataset.word_to_idx) encoder_inputs = np.array([encoder_inputs[::-1]]) feed_dict = {tensors['inputs'].name: encoder_inputs} response = sess.run(tensors['outputs'], feed_dict=feed_dict) logging.info('Reponse: %s', response)