def testWriterInitAndClose(self): logdir = self.get_temp_dir() with summary_ops.always_record_summaries(): writer = summary_ops.create_file_writer(logdir, max_queue=100, flush_millis=1000000) with writer.as_default(): summary_ops.scalar('one', 1.0, step=1) with self.cached_session() as sess: sess.run(summary_ops.summary_writer_initializer_op()) get_total = lambda: len( summary_test_util.events_from_logdir(logdir)) self.assertEqual(1, get_total()) # file_version Event # Running init() again while writer is open has no effect sess.run(writer.init()) self.assertEqual(1, get_total()) sess.run(summary_ops.all_summary_ops()) self.assertEqual(1, get_total()) # Running close() should do an implicit flush sess.run(writer.close()) self.assertEqual(2, get_total()) # Running init() on a closed writer should start a new file time.sleep(1.1) # Ensure filename has a different timestamp sess.run(writer.init()) sess.run(summary_ops.all_summary_ops()) sess.run(writer.close()) files = sorted(gfile.Glob(os.path.join(logdir, '*tfevents*'))) self.assertEqual(2, len(files)) self.assertEqual(2, len(summary_test_util.events_from_file(files[1])))
def testTrainWithSummary(self): with tf.Graph().as_default(): images = tf.placeholder(tf.float32, image_shape(None), name='images') labels = tf.placeholder(tf.float32, [None, 1000], name='labels') tf.train.get_or_create_global_step() logdir = tempfile.mkdtemp() with tf.contrib.summary.always_record_summaries(): with tf.contrib.summary.create_summary_file_writer( logdir, max_queue=0, name='t0').as_default(): model = resnet50.ResNet50(data_format()) logits = model(images, training=True) loss = tf.losses.softmax_cross_entropy( logits=logits, onehot_labels=labels) tf.contrib.summary.scalar(name='loss', tensor=loss) optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01) train_op = optimizer.minimize(loss) init = tf.global_variables_initializer() self.assertEqual(321, len(tf.global_variables())) batch_size = 32 with tf.Session() as sess: sess.run(init) sess.run(tf.contrib.summary.summary_writer_initializer_op()) np_images, np_labels = random_batch(batch_size) sess.run([train_op, tf.contrib.summary.all_summary_ops()], feed_dict={images: np_images, labels: np_labels}) events = summary_test_util.events_from_file(logdir) self.assertEqual(len(events), 2) self.assertEqual(events[1].summary.value[0].tag, 'loss')
def testWriterInitAndClose(self): logdir = self.get_temp_dir() get_total = lambda: len(summary_test_util.events_from_logdir(logdir)) with summary_ops.always_record_summaries(): writer = summary_ops.create_file_writer( logdir, max_queue=100, flush_millis=1000000) self.assertEqual(1, get_total()) # file_version Event # Calling init() again while writer is open has no effect writer.init() self.assertEqual(1, get_total()) try: # Not using .as_default() to avoid implicit flush when exiting writer.set_as_default() summary_ops.scalar('one', 1.0, step=1) self.assertEqual(1, get_total()) # Calling .close() should do an implicit flush writer.close() self.assertEqual(2, get_total()) # Calling init() on a closed writer should start a new file time.sleep(1.1) # Ensure filename has a different timestamp writer.init() files = sorted(gfile.Glob(os.path.join(logdir, '*tfevents*'))) self.assertEqual(2, len(files)) get_total = lambda: len(summary_test_util.events_from_file(files[1])) self.assertEqual(1, get_total()) # file_version Event summary_ops.scalar('two', 2.0, step=2) writer.close() self.assertEqual(2, get_total()) finally: # Clean up by resetting default writer summary_ops.create_file_writer(None).set_as_default()
def testWriterInitAndClose(self): logdir = self.get_temp_dir() with summary_ops.always_record_summaries(): writer = summary_ops.create_file_writer( logdir, max_queue=100, flush_millis=1000000) with writer.as_default(): summary_ops.scalar('one', 1.0, step=1) with self.cached_session() as sess: sess.run(summary_ops.summary_writer_initializer_op()) get_total = lambda: len(summary_test_util.events_from_logdir(logdir)) self.assertEqual(1, get_total()) # file_version Event # Running init() again while writer is open has no effect sess.run(writer.init()) self.assertEqual(1, get_total()) sess.run(summary_ops.all_summary_ops()) self.assertEqual(1, get_total()) # Running close() should do an implicit flush sess.run(writer.close()) self.assertEqual(2, get_total()) # Running init() on a closed writer should start a new file time.sleep(1.1) # Ensure filename has a different timestamp sess.run(writer.init()) sess.run(summary_ops.all_summary_ops()) sess.run(writer.close()) files = sorted(gfile.Glob(os.path.join(logdir, '*tfevents*'))) self.assertEqual(2, len(files)) self.assertEqual(2, len(summary_test_util.events_from_file(files[1])))
def testSharedName(self): logdir = self.get_temp_dir() with summary_ops.always_record_summaries(): # Create with default shared name (should match logdir) writer1 = summary_ops.create_file_writer(logdir) with writer1.as_default(): summary_ops.scalar('one', 1.0, step=1) # Create with explicit logdir shared name (should be same resource/file) shared_name = 'logdir:' + logdir writer2 = summary_ops.create_file_writer(logdir, name=shared_name) with writer2.as_default(): summary_ops.scalar('two', 2.0, step=2) # Create with different shared name (should be separate resource/file) writer3 = summary_ops.create_file_writer(logdir, name='other') with writer3.as_default(): summary_ops.scalar('three', 3.0, step=3) with self.cached_session() as sess: # Run init ops across writers sequentially to avoid race condition. # TODO(nickfelt): fix race condition in resource manager lookup or create sess.run(writer1.init()) sess.run(writer2.init()) time.sleep(1.1) # Ensure filename has a different timestamp sess.run(writer3.init()) sess.run(summary_ops.all_summary_ops()) sess.run([writer1.flush(), writer2.flush(), writer3.flush()]) event_files = iter( sorted(gfile.Glob(os.path.join(logdir, '*tfevents*')))) # First file has tags "one" and "two" events = summary_test_util.events_from_file(next(event_files)) self.assertEqual('brain.Event:2', events[0].file_version) tags = [e.summary.value[0].tag for e in events[1:]] self.assertItemsEqual(['one', 'two'], tags) # Second file has tag "three" events = summary_test_util.events_from_file(next(event_files)) self.assertEqual('brain.Event:2', events[0].file_version) tags = [e.summary.value[0].tag for e in events[1:]] self.assertItemsEqual(['three'], tags) # No more files self.assertRaises(StopIteration, lambda: next(event_files))
def testSharedName(self): logdir = self.get_temp_dir() with summary_ops.always_record_summaries(): # Create with default shared name (should match logdir) writer1 = summary_ops.create_file_writer(logdir) with writer1.as_default(): summary_ops.scalar('one', 1.0, step=1) # Create with explicit logdir shared name (should be same resource/file) shared_name = 'logdir:' + logdir writer2 = summary_ops.create_file_writer(logdir, name=shared_name) with writer2.as_default(): summary_ops.scalar('two', 2.0, step=2) # Create with different shared name (should be separate resource/file) writer3 = summary_ops.create_file_writer(logdir, name='other') with writer3.as_default(): summary_ops.scalar('three', 3.0, step=3) with self.cached_session() as sess: # Run init ops across writers sequentially to avoid race condition. # TODO(nickfelt): fix race condition in resource manager lookup or create sess.run(writer1.init()) sess.run(writer2.init()) time.sleep(1.1) # Ensure filename has a different timestamp sess.run(writer3.init()) sess.run(summary_ops.all_summary_ops()) sess.run([writer1.flush(), writer2.flush(), writer3.flush()]) event_files = iter(sorted(gfile.Glob(os.path.join(logdir, '*tfevents*')))) # First file has tags "one" and "two" events = summary_test_util.events_from_file(next(event_files)) self.assertEqual('brain.Event:2', events[0].file_version) tags = [e.summary.value[0].tag for e in events[1:]] self.assertItemsEqual(['one', 'two'], tags) # Second file has tag "three" events = summary_test_util.events_from_file(next(event_files)) self.assertEqual('brain.Event:2', events[0].file_version) tags = [e.summary.value[0].tag for e in events[1:]] self.assertItemsEqual(['three'], tags) # No more files self.assertRaises(StopIteration, lambda: next(event_files))
def testSummaryGlobalStep(self): global_step = training_util.get_or_create_global_step() logdir = tempfile.mkdtemp() with summary_ops.create_summary_file_writer( logdir, max_queue=0, name='t2').as_default(), summary_ops.always_record_summaries(): summary_ops.scalar('scalar', 2.0, global_step=global_step) events = summary_test_util.events_from_file(logdir) self.assertEqual(len(events), 2) self.assertEqual(events[1].summary.value[0].tag, 'scalar')
def testWriteSummaries(self): e = SimpleEvaluator(IdentityModel()) e(3.0) e([5.0, 7.0, 9.0]) training_util.get_or_create_global_step() logdir = tempfile.mkdtemp() e.all_metric_results(logdir) events = summary_test_util.events_from_file(logdir) self.assertEqual(len(events), 2) self.assertEqual(events[1].summary.value[0].simple_value, 6.0)
def testSummaryName(self): training_util.get_or_create_global_step() logdir = tempfile.mkdtemp() with summary_ops.create_summary_file_writer( logdir, max_queue=0, name='t2').as_default(), summary_ops.always_record_summaries(): summary_ops.scalar('scalar', 2.0) events = summary_test_util.events_from_file(logdir) self.assertEqual(len(events), 2) self.assertEqual(events[1].summary.value[0].tag, 'scalar')
def testSharedName(self): logdir = self.get_temp_dir() with summary_ops.always_record_summaries(): # Create with default shared name (should match logdir) writer1 = summary_ops.create_file_writer(logdir) with writer1.as_default(): summary_ops.scalar('one', 1.0, step=1) summary_ops.flush() # Create with explicit logdir shared name (should be same resource/file) shared_name = 'logdir:' + logdir writer2 = summary_ops.create_file_writer(logdir, name=shared_name) with writer2.as_default(): summary_ops.scalar('two', 2.0, step=2) summary_ops.flush() # Create with different shared name (should be separate resource/file) time.sleep(1.1) # Ensure filename has a different timestamp writer3 = summary_ops.create_file_writer(logdir, name='other') with writer3.as_default(): summary_ops.scalar('three', 3.0, step=3) summary_ops.flush() event_files = iter( sorted(gfile.Glob(os.path.join(logdir, '*tfevents*')))) # First file has tags "one" and "two" events = iter(summary_test_util.events_from_file(next(event_files))) self.assertEqual('brain.Event:2', next(events).file_version) self.assertEqual('one', next(events).summary.value[0].tag) self.assertEqual('two', next(events).summary.value[0].tag) self.assertRaises(StopIteration, lambda: next(events)) # Second file has tag "three" events = iter(summary_test_util.events_from_file(next(event_files))) self.assertEqual('brain.Event:2', next(events).file_version) self.assertEqual('three', next(events).summary.value[0].tag) self.assertRaises(StopIteration, lambda: next(events)) # No more files self.assertRaises(StopIteration, lambda: next(event_files))
def testWriteSummaries(self): m = metrics.Mean() m([1, 10, 100]) training_util.get_or_create_global_step() logdir = tempfile.mkdtemp() with summary_ops.create_summary_file_writer( logdir, max_queue=0, name="t0").as_default(), summary_ops.always_record_summaries(): m.result() # As a side-effect will write summaries. events = summary_test_util.events_from_file(logdir) self.assertEqual(len(events), 2) self.assertEqual(events[1].summary.value[0].simple_value, 37.0)
def testTrainSpinn(self): """Test with fake toy SNLI data and GloVe vectors.""" # 1. Create and load a fake SNLI data file and a fake GloVe embedding file. snli_1_0_dir = os.path.join(self._temp_data_dir, "snli/snli_1.0") fake_train_file = self._create_test_data(snli_1_0_dir) vocab = data.load_vocabulary(self._temp_data_dir) word2index, embed = data.load_word_vectors(self._temp_data_dir, vocab) train_data = data.SnliData(fake_train_file, word2index) dev_data = data.SnliData(fake_train_file, word2index) test_data = data.SnliData(fake_train_file, word2index) # 2. Create a fake config. config = _test_spinn_config(data.WORD_VECTOR_LEN, 4, logdir=os.path.join( self._temp_data_dir, "logdir")) # 3. Test training of a SPINN model. trainer = spinn.train_or_infer_spinn(embed, word2index, train_data, dev_data, test_data, config) # 4. Load train loss values from the summary files and verify that they # decrease with training. summary_file = glob.glob(os.path.join(config.logdir, "events.out.*"))[0] events = summary_test_util.events_from_file(summary_file) train_losses = [ event.summary.value[0].simple_value for event in events if event.summary.value and event.summary.value[0].tag == "train/loss" ] self.assertEqual(config.epochs, len(train_losses)) # 5. Verify that checkpoints exist and contains all the expected variables. self.assertTrue(glob.glob(os.path.join(config.logdir, "ckpt*"))) object_graph_string = checkpoint_utils.load_variable( config.logdir, name="_CHECKPOINTABLE_OBJECT_GRAPH") object_graph = checkpointable_object_graph_pb2.CheckpointableObjectGraph( ) object_graph.ParseFromString(object_graph_string) ckpt_variable_names = set() for node in object_graph.nodes: for attribute in node.attributes: ckpt_variable_names.add(attribute.full_name) self.assertIn("global_step", ckpt_variable_names) for v in trainer.variables: variable_name = v.name[:v.name. index(":")] if ":" in v.name else v.name self.assertIn(variable_name, ckpt_variable_names)
def testSharedName(self): logdir = self.get_temp_dir() with summary_ops.always_record_summaries(): # Create with default shared name (should match logdir) writer1 = summary_ops.create_file_writer(logdir) with writer1.as_default(): summary_ops.scalar('one', 1.0, step=1) summary_ops.flush() # Create with explicit logdir shared name (should be same resource/file) shared_name = 'logdir:' + logdir writer2 = summary_ops.create_file_writer(logdir, name=shared_name) with writer2.as_default(): summary_ops.scalar('two', 2.0, step=2) summary_ops.flush() # Create with different shared name (should be separate resource/file) time.sleep(1.1) # Ensure filename has a different timestamp writer3 = summary_ops.create_file_writer(logdir, name='other') with writer3.as_default(): summary_ops.scalar('three', 3.0, step=3) summary_ops.flush() event_files = iter(sorted(gfile.Glob(os.path.join(logdir, '*tfevents*')))) # First file has tags "one" and "two" events = iter(summary_test_util.events_from_file(next(event_files))) self.assertEqual('brain.Event:2', next(events).file_version) self.assertEqual('one', next(events).summary.value[0].tag) self.assertEqual('two', next(events).summary.value[0].tag) self.assertRaises(StopIteration, lambda: next(events)) # Second file has tag "three" events = iter(summary_test_util.events_from_file(next(event_files))) self.assertEqual('brain.Event:2', next(events).file_version) self.assertEqual('three', next(events).summary.value[0].tag) self.assertRaises(StopIteration, lambda: next(events)) # No more files self.assertRaises(StopIteration, lambda: next(event_files))
def testWriteSummariesGraph(self): with context.graph_mode(), ops.Graph().as_default(), self.test_session(): e = SimpleEvaluator(IdentityModel()) ds = dataset_ops.Dataset.from_tensor_slices([3.0, 5.0, 7.0, 9.0]) training_util.get_or_create_global_step() logdir = tempfile.mkdtemp() init_op, call_op, results_op = e.evaluate_on_dataset( ds, summary_logdir=logdir) variables.global_variables_initializer().run() e.run_evaluation(init_op, call_op, results_op) events = summary_test_util.events_from_file(logdir) self.assertEqual(len(events), 2) self.assertEqual(events[1].summary.value[0].simple_value, 6.0)
def testDefunSummarys(self): training_util.get_or_create_global_step() logdir = tempfile.mkdtemp() with summary_ops.create_summary_file_writer( logdir, max_queue=0, name='t1').as_default(), summary_ops.always_record_summaries(): @function.defun def write(): summary_ops.scalar('scalar', 2.0) write() events = summary_test_util.events_from_file(logdir) self.assertEqual(len(events), 2) self.assertEqual(events[1].summary.value[0].simple_value, 2.0)
def testWriteSummariesGraph(self): with context.graph_mode(), ops.Graph().as_default(), self.test_session( ): e = SimpleEvaluator(IdentityModel()) ds = dataset_ops.Dataset.from_tensor_slices([3.0, 5.0, 7.0, 9.0]) training_util.get_or_create_global_step() logdir = tempfile.mkdtemp() init_op, call_op, results_op = e.evaluate_on_dataset( ds, summary_logdir=logdir) variables.global_variables_initializer().run() e.run_evaluation(init_op, call_op, results_op) events = summary_test_util.events_from_file(logdir) self.assertEqual(len(events), 2) self.assertEqual(events[1].summary.value[0].simple_value, 6.0)
def test_train(self): device, data_format = device_and_data_format() model = resnet50.ResNet50(data_format) tf.train.get_or_create_global_step() logdir = tempfile.mkdtemp() with tf.contrib.summary.create_summary_file_writer( logdir, max_queue=0, name='t0').as_default(), tf.contrib.summary.always_record_summaries(): with tf.device(device): optimizer = tf.train.GradientDescentOptimizer(0.1) images, labels = random_batch(2) train_one_step(model, images, labels, optimizer) self.assertEqual(320, len(model.variables)) events = summary_test_util.events_from_file(logdir) self.assertEqual(len(events), 2) self.assertEqual(events[1].summary.value[0].tag, 'loss')
def test_train(self): device, data_format = device_and_data_format() model = resnet50.ResNet50(data_format) tf.train.get_or_create_global_step() logdir = tempfile.mkdtemp() with tf.contrib.summary.create_summary_file_writer( logdir, max_queue=0, name='t0').as_default( ), tf.contrib.summary.always_record_summaries(): with tf.device(device): optimizer = tf.train.GradientDescentOptimizer(0.1) images, labels = random_batch(2) train_one_step(model, images, labels, optimizer) self.assertEqual(320, len(model.variables)) events = summary_test_util.events_from_file(logdir) self.assertEqual(len(events), 2) self.assertEqual(events[1].summary.value[0].tag, 'loss')
def testTrainSpinn(self): """Test with fake toy SNLI data and GloVe vectors.""" # 1. Create and load a fake SNLI data file and a fake GloVe embedding file. snli_1_0_dir = os.path.join(self._temp_data_dir, "snli/snli_1.0") fake_train_file = self._create_test_data(snli_1_0_dir) vocab = data.load_vocabulary(self._temp_data_dir) word2index, embed = data.load_word_vectors(self._temp_data_dir, vocab) train_data = data.SnliData(fake_train_file, word2index) dev_data = data.SnliData(fake_train_file, word2index) test_data = data.SnliData(fake_train_file, word2index) # 2. Create a fake config. config = _test_spinn_config( data.WORD_VECTOR_LEN, 4, logdir=os.path.join(self._temp_data_dir, "logdir")) # 3. Test training of a SPINN model. trainer = spinn.train_or_infer_spinn( embed, word2index, train_data, dev_data, test_data, config) # 4. Load train loss values from the summary files and verify that they # decrease with training. summary_file = glob.glob(os.path.join(config.logdir, "events.out.*"))[0] events = summary_test_util.events_from_file(summary_file) train_losses = [event.summary.value[0].simple_value for event in events if event.summary.value and event.summary.value[0].tag == "train/loss"] self.assertEqual(config.epochs, len(train_losses)) # 5. Verify that checkpoints exist and contains all the expected variables. self.assertTrue(glob.glob(os.path.join(config.logdir, "ckpt*"))) object_graph_string = checkpoint_utils.load_variable( config.logdir, name="_CHECKPOINTABLE_OBJECT_GRAPH") object_graph = checkpointable_object_graph_pb2.CheckpointableObjectGraph() object_graph.ParseFromString(object_graph_string) ckpt_variable_names = set() for node in object_graph.nodes: for attribute in node.attributes: ckpt_variable_names.add(attribute.full_name) self.assertIn("global_step", ckpt_variable_names) for v in trainer.variables: variable_name = v.name[:v.name.index(":")] if ":" in v.name else v.name self.assertIn(variable_name, ckpt_variable_names)
def testTrainWithSummary(self): with tf.Graph().as_default(): images = tf.placeholder(tf.float32, image_shape(None), name='images') labels = tf.placeholder(tf.float32, [None, 1000], name='labels') tf.train.get_or_create_global_step() logdir = tempfile.mkdtemp() with tf.contrib.summary.always_record_summaries(): with tf.contrib.summary.create_summary_file_writer( logdir, max_queue=0, name='t0').as_default(): model = resnet50.ResNet50(data_format()) logits = model(images, training=True) loss = tf.losses.softmax_cross_entropy( logits=logits, onehot_labels=labels) tf.contrib.summary.scalar(name='loss', tensor=loss) optimizer = tf.train.GradientDescentOptimizer( learning_rate=0.01) train_op = optimizer.minimize(loss) init = tf.global_variables_initializer() self.assertEqual(321, len(tf.global_variables())) batch_size = 32 with tf.Session() as sess: sess.run(init) sess.run(tf.contrib.summary.summary_writer_initializer_op()) np_images, np_labels = random_batch(batch_size) sess.run( [train_op, tf.contrib.summary.all_summary_ops()], feed_dict={ images: np_images, labels: np_labels }) events = summary_test_util.events_from_file(logdir) self.assertEqual(len(events), 2) self.assertEqual(events[1].summary.value[0].tag, 'loss')
def testTrainSpinn(self): """Test with fake toy SNLI data and GloVe vectors.""" # 1. Create and load a fake SNLI data file and a fake GloVe embedding file. snli_1_0_dir = os.path.join(self._temp_data_dir, "snli/snli_1.0") fake_train_file = os.path.join(snli_1_0_dir, "snli_1.0_train.txt") os.makedirs(snli_1_0_dir) # Four sentences in total. with open(fake_train_file, "wt") as f: f.write( "gold_label\tsentence1_binary_parse\tsentence2_binary_parse\t" "sentence1_parse\tsentence2_parse\tsentence1\tsentence2\t" "captionID\tpairID\tlabel1\tlabel2\tlabel3\tlabel4\tlabel5\n") f.write("neutral\t( ( Foo bar ) . )\t( ( foo . )\t" "DummySentence1Parse\tDummySentence2Parse\t" "Foo bar.\tfoo baz.\t" "4705552913.jpg#2\t4705552913.jpg#2r1n\t" "neutral\tentailment\tneutral\tneutral\tneutral\n") f.write("contradiction\t( ( Bar foo ) . )\t( ( baz . )\t" "DummySentence1Parse\tDummySentence2Parse\t" "Foo bar.\tfoo baz.\t" "4705552913.jpg#2\t4705552913.jpg#2r1n\t" "neutral\tentailment\tneutral\tneutral\tneutral\n") f.write("entailment\t( ( Quux quuz ) . )\t( ( grault . )\t" "DummySentence1Parse\tDummySentence2Parse\t" "Foo bar.\tfoo baz.\t" "4705552913.jpg#2\t4705552913.jpg#2r1n\t" "neutral\tentailment\tneutral\tneutral\tneutral\n") f.write("entailment\t( ( Quuz quux ) . )\t( ( garply . )\t" "DummySentence1Parse\tDummySentence2Parse\t" "Foo bar.\tfoo baz.\t" "4705552913.jpg#2\t4705552913.jpg#2r1n\t" "neutral\tentailment\tneutral\tneutral\tneutral\n") glove_dir = os.path.join(self._temp_data_dir, "glove") os.makedirs(glove_dir) glove_file = os.path.join(glove_dir, "glove.42B.300d.txt") words = [".", "foo", "bar", "baz", "quux", "quuz", "grault", "garply"] with open(glove_file, "wt") as f: for i, word in enumerate(words): f.write("%s " % word) for j in range(data.WORD_VECTOR_LEN): f.write("%.5f" % (i * 0.1)) if j < data.WORD_VECTOR_LEN - 1: f.write(" ") else: f.write("\n") vocab = data.load_vocabulary(self._temp_data_dir) word2index, embed = data.load_word_vectors(self._temp_data_dir, vocab) train_data = data.SnliData(fake_train_file, word2index) dev_data = data.SnliData(fake_train_file, word2index) test_data = data.SnliData(fake_train_file, word2index) print(embed) # 2. Create a fake config. config = _test_spinn_config(data.WORD_VECTOR_LEN, 4, logdir=os.path.join( self._temp_data_dir, "logdir")) # 3. Test training of a SPINN model. spinn.train_spinn(embed, train_data, dev_data, test_data, config) # 4. Load train loss values from the summary files and verify that they # decrease with training. summary_file = glob.glob(os.path.join(config.logdir, "events.out.*"))[0] events = summary_test_util.events_from_file(summary_file) train_losses = [ event.summary.value[0].simple_value for event in events if event.summary.value and event.summary.value[0].tag == "train/loss" ] self.assertEqual(config.epochs, len(train_losses)) self.assertLess(train_losses[-1], train_losses[0])
def testTrainSpinn(self): """Test with fake toy SNLI data and GloVe vectors.""" # 1. Create and load a fake SNLI data file and a fake GloVe embedding file. snli_1_0_dir = os.path.join(self._temp_data_dir, "snli/snli_1.0") fake_train_file = os.path.join(snli_1_0_dir, "snli_1.0_train.txt") os.makedirs(snli_1_0_dir) # Four sentences in total. with open(fake_train_file, "wt") as f: f.write("gold_label\tsentence1_binary_parse\tsentence2_binary_parse\t" "sentence1_parse\tsentence2_parse\tsentence1\tsentence2\t" "captionID\tpairID\tlabel1\tlabel2\tlabel3\tlabel4\tlabel5\n") f.write("neutral\t( ( Foo bar ) . )\t( ( foo . )\t" "DummySentence1Parse\tDummySentence2Parse\t" "Foo bar.\tfoo baz.\t" "4705552913.jpg#2\t4705552913.jpg#2r1n\t" "neutral\tentailment\tneutral\tneutral\tneutral\n") f.write("contradiction\t( ( Bar foo ) . )\t( ( baz . )\t" "DummySentence1Parse\tDummySentence2Parse\t" "Foo bar.\tfoo baz.\t" "4705552913.jpg#2\t4705552913.jpg#2r1n\t" "neutral\tentailment\tneutral\tneutral\tneutral\n") f.write("entailment\t( ( Quux quuz ) . )\t( ( grault . )\t" "DummySentence1Parse\tDummySentence2Parse\t" "Foo bar.\tfoo baz.\t" "4705552913.jpg#2\t4705552913.jpg#2r1n\t" "neutral\tentailment\tneutral\tneutral\tneutral\n") f.write("entailment\t( ( Quuz quux ) . )\t( ( garply . )\t" "DummySentence1Parse\tDummySentence2Parse\t" "Foo bar.\tfoo baz.\t" "4705552913.jpg#2\t4705552913.jpg#2r1n\t" "neutral\tentailment\tneutral\tneutral\tneutral\n") glove_dir = os.path.join(self._temp_data_dir, "glove") os.makedirs(glove_dir) glove_file = os.path.join(glove_dir, "glove.42B.300d.txt") words = [".", "foo", "bar", "baz", "quux", "quuz", "grault", "garply"] with open(glove_file, "wt") as f: for i, word in enumerate(words): f.write("%s " % word) for j in range(data.WORD_VECTOR_LEN): f.write("%.5f" % (i * 0.1)) if j < data.WORD_VECTOR_LEN - 1: f.write(" ") else: f.write("\n") vocab = data.load_vocabulary(self._temp_data_dir) word2index, embed = data.load_word_vectors(self._temp_data_dir, vocab) train_data = data.SnliData(fake_train_file, word2index) dev_data = data.SnliData(fake_train_file, word2index) test_data = data.SnliData(fake_train_file, word2index) print(embed) # 2. Create a fake config. config = _test_spinn_config( data.WORD_VECTOR_LEN, 4, logdir=os.path.join(self._temp_data_dir, "logdir")) # 3. Test training of a SPINN model. spinn.train_spinn(embed, train_data, dev_data, test_data, config) # 4. Load train loss values from the summary files and verify that they # decrease with training. summary_file = glob.glob(os.path.join(config.logdir, "events.out.*"))[0] events = summary_test_util.events_from_file(summary_file) train_losses = [event.summary.value[0].simple_value for event in events if event.summary.value and event.summary.value[0].tag == "train/loss"] self.assertEqual(config.epochs, len(train_losses)) self.assertLess(train_losses[-1], train_losses[0])