def testTFSummaryTensor(self): """Verify processing of tf.summary.tensor.""" event_sink = _EventGenerator(self, zero_out_timestamps=True) writer = tf.summary.FileWriter(self.get_temp_dir()) writer.event_writer = event_sink with self.test_session() as sess: tf.summary.tensor_summary('scalar', tf.constant(1.0)) tf.summary.tensor_summary('vector', tf.constant([1.0, 2.0, 3.0])) tf.summary.tensor_summary('string', tf.constant(six.b('foobar'))) merged = tf.summary.merge_all() summ = sess.run(merged) writer.add_summary(summ, 0) accumulator = ea.EventAccumulator(event_sink) accumulator.Reload() self.assertTagsEqual(accumulator.Tags(), { ea.TENSORS: ['scalar', 'vector', 'string'], }) scalar_proto = accumulator.Tensors('scalar')[0].tensor_proto scalar = tf.make_ndarray(scalar_proto) vector_proto = accumulator.Tensors('vector')[0].tensor_proto vector = tf.make_ndarray(vector_proto) string_proto = accumulator.Tensors('string')[0].tensor_proto string = tf.make_ndarray(string_proto) self.assertTrue(np.array_equal(scalar, 1.0)) self.assertTrue(np.array_equal(vector, [1.0, 2.0, 3.0])) self.assertTrue(np.array_equal(string, six.b('foobar')))
def test_correctly_handles_no_audio(self): shape = (0, self.audio_length, 2) audio = np.array([]).reshape(shape).astype(np.float32) pb = self.compute_and_check_summary_pb('k488', audio, max_outputs=3) self.assertEqual(1, len(pb.value)) results = tf.make_ndarray(pb.value[0].tensor) self.assertEqual(results.shape, (0, 2))
def last_metric_eval(multiplexer, session_name, metric_name): """Returns the last evaluations of the given metric at the given session. Args: multiplexer: The EventMultiplexer instance allowing access to the exported summary data. session_name: String. The session name for which to get the metric evaluations. metric_name: api_pb2.MetricName proto. The name of the metric to use. Returns: A 3-tuples, of the form [wall-time, step, value], denoting the last evaluation of the metric, where wall-time denotes the wall time in seconds since UNIX epoch of the time of the evaluation, step denotes the training step at which the model is evaluated, and value denotes the (scalar real) value of the metric. Raises: KeyError if the given session does not have the metric. """ assert isinstance(session_name, str) assert isinstance(metric_name, api_pb2.MetricName) run = session_name + metric_name.group tag = metric_name.tag try: tensor_events = multiplexer.Tensors(run=run, tag=tag) except KeyError as e: raise KeyError( 'Can\'t find metric %s for session: %s. Underlying error message: %s' % (metric_name, session_name, e)) last_event = tensor_events[-1] # TODO(erez): Raise HParamsError if the tensor is not a 0-D real scalar. return (last_event.wall_time, last_event.step, tf.make_ndarray(last_event.tensor_proto).item())
def test_audio_count_when_more_than_max(self): max_outputs = len(self.stereo) + 2 pb = self.compute_and_check_summary_pb('k488', self.stereo, max_outputs=max_outputs) self.assertEqual(1, len(pb.value)) results = tf.make_ndarray(pb.value[0].tensor) self.assertEqual(results.shape, (len(self.stereo), 2))
def process_string_tensor_event(event): """Convert a TensorEvent into a JSON-compatible response.""" string_arr = tf.make_ndarray(event.tensor_proto) html = text_array_to_html(string_arr) return { 'wall_time': event.wall_time, 'step': event.step, 'text': html, }
def test_when_bucket_count_not_statically_known(self): placeholder = tf.placeholder(tf.int32, shape=()) bucket_count = 44 pb = self.compute_and_check_summary_pb( bucket_count=bucket_count, bucket_count_tensor=placeholder, feed_dict={placeholder: bucket_count}) buckets = tf.make_ndarray(pb.value[0].tensor) self.assertEqual(buckets.shape, (bucket_count, 3))
def test_normal_input(self): bucket_count = 44 pb = self.compute_and_check_summary_pb(data=self.gaussian.reshape((5, -1)), bucket_count=bucket_count) buckets = tf.make_ndarray(pb.value[0].tensor) self.assertEqual(buckets[:, 0].min(), self.gaussian.min()) self.assertEqual(buckets[:, 1].max(), self.gaussian.max()) self.assertEqual(buckets[:, 2].sum(), self.gaussian.size) np.testing.assert_allclose(buckets[1:, 0], buckets[:-1, 1])
def test_many_values_with_weights(self): pb = self.compute_and_check_summary_pb( name='foo', labels=np.array([True, False, False, True, True, True]), predictions=np.float32([0.2, 0.3, 0.4, 0.6, 0.7, 0.8]), num_thresholds=3, weights=np.float32([0.0, 0.5, 2.0, 0.0, 0.5, 1.0])) expected = [[1.5, 1.5, 0.0], [2.5, 0.0, 0.0], [0.0, 2.5, 2.5], [0.0, 0.0, 1.5], [0.375, 1.0, 0.0], [1.0, 1.0, 0.0]] values = tf.make_ndarray(pb.value[0].tensor) self.verify_float_arrays_are_equal(expected, values)
def test_np_array_unicode_value(self): pb = self.compute_and_check_summary_pb( 'fa', np.array([[u'A', u'long', u'long'], [u'way', u'to', u'run \u203C']])) values = tf.make_ndarray(pb.value[0].tensor).tolist() self.assertEqual( [[b'A', b'long', b'long'], [b'way', b'to', b'run \xe2\x80\xbc']], values) # Check that all entries are byte strings. for vectors in values: for value in vectors: self.assertIsInstance(value, six.binary_type)
def _process_tensor_event(self, event, thresholds): """Converts a TensorEvent into a dict that encapsulates information on it. Args: event: The TensorEvent to convert. thresholds: An array of floats that ranges from 0 to 1 (in that direction and inclusive of 0 and 1). Returns: A JSON-able dictionary of PR curve data for 1 step. """ return self._make_pr_entry(event.step, event.wall_time, tf.make_ndarray(event.tensor_proto), thresholds)
def test_all_false_negatives(self): pb = self.compute_and_check_summary_pb(name='foo', labels=np.array([True]), predictions=np.float32([0]), num_thresholds=3) expected = [ [1.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 1.0, 1.0], [1.0, 0.0, 0.0], [1.0, 0.0, 0.0], ] values = tf.make_ndarray(pb.value[0].tensor) self.verify_float_arrays_are_equal(expected, values)
def test_counts_below_1(self): """Tests support for counts below 1. Certain weights cause TP, FP, TN, FN counts to be below 1. """ pb = self.compute_and_check_summary_pb( name='foo', labels=np.array([True, False, False, True, True, True]), predictions=np.float32([0.2, 0.3, 0.4, 0.6, 0.7, 0.8]), num_thresholds=3, weights=np.float32([0.0, 0.1, 0.2, 0.1, 0.1, 0.0])) expected = [[0.2, 0.2, 0.0], [0.3, 0.0, 0.0], [0.0, 0.3, 0.3], [0.0, 0.0, 0.2], [0.4, 1.0, 0.0], [1.0, 1.0, 0.0]] values = tf.make_ndarray(pb.value[0].tensor) self.verify_float_arrays_are_equal(expected, values)
def test_scalar(self): old_op = tf.summary.scalar('important_constants', tf.constant(0x5f3759df)) old_value = self._value_from_op(old_op) assert old_value.HasField('simple_value'), old_value new_value = data_compat.migrate_value(old_value) self.assertEqual('important_constants', new_value.tag) expected_metadata = scalar_metadata.create_summary_metadata( display_name='important_constants', description='') self.assertEqual(expected_metadata, new_value.metadata) self.assertTrue(new_value.HasField('tensor')) data = tf.make_ndarray(new_value.tensor) self.assertEqual((), data.shape) low_precision_value = np.array(0x5f3759df).astype('float32').item() self.assertEqual(low_precision_value, data.item())
def make_ndarray(tensor): """Create a numpy ndarray from a tensor. Create a numpy ndarray with the same shape and data as the tensor. Args: tensor: A TensorProto. Returns: A numpy array with the tensor contents. Raises: TypeError: if tensor has unsupported type. """ return tf.make_ndarray(tensor)
def test_histogram(self): old_op = tf.summary.histogram('important_data', tf.random_normal(shape=[23, 45])) old_value = self._value_from_op(old_op) assert old_value.HasField('histo'), old_value new_value = data_compat.migrate_value(old_value) self.assertEqual('important_data', new_value.tag) expected_metadata = histogram_metadata.create_summary_metadata( display_name='important_data', description='') self.assertEqual(expected_metadata, new_value.metadata) self.assertTrue(new_value.HasField('tensor')) buckets = tf.make_ndarray(new_value.tensor) self.assertEqual(old_value.histo.min, buckets[0][0]) self.assertEqual(old_value.histo.max, buckets[-1][1]) self.assertEqual(23 * 45, buckets[:, 2].astype(int).sum())
def normalize_summary_pb(self, pb): """Pass `pb`'s `TensorProto` through a marshalling roundtrip. `TensorProto`s can be equal in value even if they are not identical in representation, because data can be stored in either the `tensor_content` field or the `${dtype}_value` field. This normalization ensures a canonical form, and should be used before comparing two `Summary`s for equality. """ result = tf.Summary() result.MergeFrom(pb) for value in result.value: if value.HasField('tensor'): new_tensor = tf.make_tensor_proto(tf.make_ndarray( value.tensor)) value.ClearField('tensor') value.tensor.MergeFrom(new_tensor) return result
def test_exhaustive_random_values(self): # Most other tests use small and crafted predictions and labels. # This test exhaustively generates many data points. data_points = 420 pb = self.compute_and_check_summary_pb( name='foo', labels=np.random.uniform(size=(data_points, )) > 0.5, predictions=np.float32(np.random.uniform(size=(data_points, ))), num_thresholds=5) expected = [[218.0, 162.0, 111.0, 55.0, 0.0], [202.0, 148.0, 98.0, 51.0, 0.0], [0.0, 54.0, 104.0, 151.0, 202.0], [0.0, 56.0, 107.0, 163.0, 218.0], [0.5190476, 0.5225806, 0.5311005, 0.5188679, 0.0], [1.0, 0.7431192, 0.5091743, 0.2522936, 0.0]] values = tf.make_ndarray(pb.value[0].tensor) self.verify_float_arrays_are_equal(expected, values)
def scalars_impl(self, tag, run, experiment, output_format): """Result of the form `(body, mime_type)`.""" if self._db_connection_provider: db = self._db_connection_provider() # We select for steps greater than -1 because the writer inserts # placeholder rows en masse. The check for step filters out those rows. cursor = db.execute(''' SELECT Tensors.step, Tensors.computed_time, Tensors.data, Tensors.dtype FROM Tensors JOIN Tags ON Tensors.series = Tags.tag_id JOIN Runs ON Tags.run_id = Runs.run_id WHERE /* For backwards compatibility, ignore the experiment id for matching purposes if it is empty. */ (:exp == '' OR Runs.experiment_id == CAST(:exp AS INT)) AND Runs.run_name = :run AND Tags.tag_name = :tag AND Tags.plugin_name = :plugin AND Tensors.shape = '' AND Tensors.step > -1 ORDER BY Tensors.step ''', dict(exp=experiment, run=run, tag=tag, plugin=metadata.PLUGIN_NAME)) values = [(wall_time, step, self._get_value(data, dtype_enum)) for (step, wall_time, data, dtype_enum) in cursor] else: tensor_events = self._multiplexer.Tensors(run, tag) values = [(tensor_event.wall_time, tensor_event.step, tf.make_ndarray(tensor_event.tensor_proto).item()) for tensor_event in tensor_events] if output_format == OutputFormat.CSV: string_io = StringIO() writer = csv.writer(string_io) writer.writerow(['Wall time', 'Step', 'Value']) writer.writerows(values) return (string_io.getvalue(), 'text/csv') else: return (values, 'application/json')
def layout_impl(self): # Keep a mapping between and category so we do not create duplicate # categories. title_to_category = {} merged_layout = None runs = list( self._multiplexer.PluginRunToTagToContent(metadata.PLUGIN_NAME)) runs.sort() for run in runs: tensor_events = self._multiplexer.Tensors( run, metadata.CONFIG_SUMMARY_TAG) # This run has a layout. Merge it with the ones currently found. string_array = tf.make_ndarray(tensor_events[0].tensor_proto) content = np.asscalar(string_array) layout_proto = layout_pb2.Layout() layout_proto.ParseFromString(tf.compat.as_bytes(content)) if merged_layout: # Append the categories within this layout to the merged layout. for category in layout_proto.category: if category.title in title_to_category: # A category with this name has been seen before. Do not create a # new one. Merge their charts, skipping any duplicates. title_to_category[category.title].chart.extend([ c for c in category.chart if c not in title_to_category[category.title].chart ]) else: # This category has not been seen before. merged_layout.category.add().MergeFrom(category) title_to_category[category.title] = category else: # This is the first layout encountered. merged_layout = layout_proto for category in layout_proto.category: title_to_category[category.title] = category if merged_layout: return json_format.MessageToJson( merged_layout, including_default_value_fields=True) else: # No layout was found. return {}
def test_audio(self): audio = tf.reshape(tf.linspace(0.0, 100.0, 4 * 10 * 2), (4, 10, 2)) old_op = tf.summary.audio('k488', audio, 44100) old_value = self._value_from_op(old_op) assert old_value.HasField('audio'), old_value new_value = data_compat.migrate_value(old_value) self.assertEqual('k488/audio/0', new_value.tag) expected_metadata = audio_metadata.create_summary_metadata( display_name='k488/audio/0', description='', encoding=audio_metadata.Encoding.Value('WAV')) self.assertEqual(expected_metadata, new_value.metadata) self.assertTrue(new_value.HasField('tensor')) data = tf.make_ndarray(new_value.tensor) self.assertEqual((1, 2), data.shape) self.assertEqual(tf.compat.as_bytes(old_value.audio.encoded_audio_string), data[0][0]) self.assertEqual(b'', data[0][1]) # empty label
def test_image(self): old_op = tf.summary.image('mona_lisa', tf.image.convert_image_dtype( tf.random_normal(shape=[1, 400, 200, 3]), tf.uint8, saturate=True)) old_value = self._value_from_op(old_op) assert old_value.HasField('image'), old_value new_value = data_compat.migrate_value(old_value) self.assertEqual('mona_lisa/image/0', new_value.tag) expected_metadata = image_metadata.create_summary_metadata( display_name='mona_lisa/image/0', description='') self.assertEqual(expected_metadata, new_value.metadata) self.assertTrue(new_value.HasField('tensor')) (width, height, data) = tf.make_ndarray(new_value.tensor) self.assertEqual(b'200', width) self.assertEqual(b'400', height) self.assertEqual( tf.compat.as_bytes(old_value.image.encoded_image_string), data)
def write_summaries(self, tagged_data, experiment_name, run_name): """Transactionally writes the given tagged summary data to the DB. Args: tagged_data: map from tag to TagData instances. experiment_name: name of experiment. run_name: name of run. """ tf.logging.debug('Writing summaries for %s tags', len(tagged_data)) # Connection used as context manager for auto commit/rollback on exit. # We still need an explicit BEGIN, because it doesn't do one on enter, # it waits until the first DML command - which is totally broken. # See: https://stackoverflow.com/a/44448465/1179226 with self._db: self._db.execute('BEGIN TRANSACTION') run_id = self._maybe_init_run(experiment_name, run_name) tag_to_metadata = { tag: tagdata.metadata for tag, tagdata in six.iteritems(tagged_data) } tag_to_id = self._maybe_init_tags(run_id, tag_to_metadata) tensor_values = [] for tag, tagdata in six.iteritems(tagged_data): tag_id = tag_to_id[tag] for step, wall_time, tensor_proto in tagdata.values: dtype = tensor_proto.dtype shape = ','.join(str(d.size) for d in tensor_proto.tensor_shape.dim) # Use tensor_proto.tensor_content if it's set, to skip relatively # expensive extraction into intermediate ndarray. data = self._make_blob( tensor_proto.tensor_content or tf.make_ndarray(tensor_proto).tobytes()) tensor_values.append((tag_id, step, wall_time, dtype, shape, data)) self._db.executemany( """ INSERT OR REPLACE INTO Tensors ( series, step, computed_time, dtype, shape, data ) VALUES (?, ?, ?, ?, ?, ?) """, tensor_values)
def compute_and_check_summary_pb(self, name, audio, max_outputs=3, display_name=None, description=None, audio_tensor=None, feed_dict=None): """Use both `op` and `pb` to get a summary, asserting validity. "Validity" means that the `op` and `pb` functions must return the same protobufs, and also that each encoded audio value appears to be a valid WAV file. If either of these conditions fails, the test will immediately fail. Otherwise, the valid protobuf will be returned. Returns: A `Summary` protocol buffer. """ if audio_tensor is None: audio_tensor = tf.constant(audio) op = summary.op(name, audio_tensor, self.samples_per_second, max_outputs=max_outputs, display_name=display_name, description=description) pb = summary.pb(name, audio, self.samples_per_second, max_outputs=max_outputs, display_name=display_name, description=description) pb_via_op = self.pb_via_op(op, feed_dict=feed_dict) self.assertProtoEquals(pb, pb_via_op) audios = tf.make_ndarray(pb.value[0].tensor)[:, 0].tolist() invalid_audios = [x for x in audios if not x.startswith(b'RIFF')] self.assertFalse(invalid_audios) return pb
def test_bool_value(self): # bools should be valid, but converted to floats. pb = self.scalar('a', True) value = tf.make_ndarray(pb.value[0].tensor).item() self.assertEqual(float, type(value)) self.assertEqual(1.0, value)
def test_empty_input(self): pb = self.compute_and_check_summary_pb('nothing_to_see_here', []) buckets = tf.make_ndarray(pb.value[0].tensor) np.testing.assert_allclose(buckets, np.array([]).reshape((0, 3)))
def test_int_value(self): # ints should be valid, but converted to floats. pb = self.scalar('a', 113) value = tf.make_ndarray(pb.value[0].tensor).item() self.assertEqual(float, type(value)) self.assertNear(113.0, value, 1e-6)
def test_float_value(self): pb = self.scalar('a', 1.13) value = tf.make_ndarray(pb.value[0].tensor).item() self.assertEqual(float, type(value)) self.assertNear(1.13, value, 1e-6)
def test_input_with_all_same_values(self): pb = self.compute_and_check_summary_pb('twelven', [12, 12, 12]) buckets = tf.make_ndarray(pb.value[0].tensor) np.testing.assert_allclose(buckets, np.array([[11.5, 12.5, 3]]))
def histograms_impl(self, tag, run, downsample_to=None): """Result of the form `(body, mime_type)`, or `ValueError`. At most `downsample_to` events will be returned. If this value is `None`, then no downsampling will be performed. """ if self._db_connection_provider: # Serve data from the database. db = self._db_connection_provider() cursor = db.cursor() # Prefetch the tag ID matching this run and tag. cursor.execute( ''' SELECT tag_id FROM Tags JOIN Runs USING (run_id) WHERE Runs.run_name = :run AND Tags.tag_name = :tag AND Tags.plugin_name = :plugin ''', { 'run': run, 'tag': tag, 'plugin': metadata.PLUGIN_NAME }) row = cursor.fetchone() if not row: raise ValueError('No histogram tag %r for run %r' % (tag, run)) (tag_id, ) = row # Fetch tensor values, optionally with linear-spaced sampling by step. # For steps ranging from s_min to s_max and sample size k, this query # divides the range into k - 1 equal-sized intervals and returns the # lowest step at or above each of the k interval boundaries (which always # includes s_min and s_max, and may be fewer than k results if there are # intervals where no steps are present). For contiguous steps the results # can be formally expressed as the following: # [s_min + math.ceil(i / k * (s_max - s_min)) for i in range(0, k + 1)] cursor.execute( ''' SELECT MIN(step) AS step, computed_time, data, dtype, shape FROM Tensors INNER JOIN ( SELECT MIN(step) AS min_step, MAX(step) AS max_step FROM Tensors /* Filter out NULL so we can use TensorSeriesStepIndex. */ WHERE series = :tag_id AND step IS NOT NULL ) /* Ensure we omit reserved rows, which have NULL step values. */ WHERE series = :tag_id AND step IS NOT NULL /* Bucket rows into sample_size linearly spaced buckets, or do no sampling if sample_size is NULL. */ GROUP BY IFNULL(:sample_size - 1, max_step - min_step) * (step - min_step) / (max_step - min_step) ORDER BY step ''', { 'tag_id': tag_id, 'sample_size': downsample_to }) events = [(computed_time, step, self._get_values(data, dtype, shape)) for step, computed_time, data, dtype, shape in cursor] else: # Serve data from events files. try: tensor_events = self._multiplexer.Tensors(run, tag) except KeyError: raise ValueError('No histogram tag %r for run %r' % (tag, run)) if downsample_to is not None and len( tensor_events) > downsample_to: rand_indices = random.Random(0).sample( six.moves.xrange(len(tensor_events)), downsample_to) indices = sorted(rand_indices) tensor_events = [tensor_events[i] for i in indices] events = [[ e.wall_time, e.step, tf.make_ndarray(e.tensor_proto).tolist() ] for e in tensor_events] return (events, 'application/json')
def testTensorsRealistically(self): """Test accumulator by writing values and then reading them.""" def FakeScalarSummary(tag, value): value = tf.Summary.Value(tag=tag, simple_value=value) summary = tf.Summary(value=[value]) return summary directory = os.path.join(self.get_temp_dir(), 'values_dir') if tf.gfile.IsDirectory(directory): tf.gfile.DeleteRecursively(directory) tf.gfile.MkDir(directory) writer = tf.summary.FileWriter(directory, max_queue=100) with tf.Graph().as_default() as graph: _ = tf.constant([2.0, 1.0]) # Add a graph to the summary writer. writer.add_graph(graph) meta_graph_def = tf.train.export_meta_graph(graph_def=graph.as_graph_def( add_shapes=True)) writer.add_meta_graph(meta_graph_def) run_metadata = tf.RunMetadata() device_stats = run_metadata.step_stats.dev_stats.add() device_stats.device = 'test device' writer.add_run_metadata(run_metadata, 'test run') # Write a bunch of events using the writer. for i in xrange(30): summ_id = FakeScalarSummary('id', i) summ_sq = FakeScalarSummary('sq', i * i) writer.add_summary(summ_id, i * 5) writer.add_summary(summ_sq, i * 5) writer.flush() # Verify that we can load those events properly acc = ea.EventAccumulator(directory) acc.Reload() self.assertTagsEqual(acc.Tags(), { ea.TENSORS: ['id', 'sq'], ea.GRAPH: True, ea.META_GRAPH: True, ea.RUN_METADATA: ['test run'], }) id_events = acc.Tensors('id') sq_events = acc.Tensors('sq') self.assertEqual(30, len(id_events)) self.assertEqual(30, len(sq_events)) for i in xrange(30): self.assertEqual(i * 5, id_events[i].step) self.assertEqual(i * 5, sq_events[i].step) self.assertEqual(i, tf.make_ndarray(id_events[i].tensor_proto).item()) self.assertEqual(i * i, tf.make_ndarray(sq_events[i].tensor_proto).item()) # Write a few more events to test incremental reloading for i in xrange(30, 40): summ_id = FakeScalarSummary('id', i) summ_sq = FakeScalarSummary('sq', i * i) writer.add_summary(summ_id, i * 5) writer.add_summary(summ_sq, i * 5) writer.flush() # Verify we can now see all of the data acc.Reload() id_events = acc.Tensors('id') sq_events = acc.Tensors('sq') self.assertEqual(40, len(id_events)) self.assertEqual(40, len(sq_events)) for i in xrange(40): self.assertEqual(i * 5, id_events[i].step) self.assertEqual(i * 5, sq_events[i].step) self.assertEqual(i, tf.make_ndarray(id_events[i].tensor_proto).item()) self.assertEqual(i * i, tf.make_ndarray(sq_events[i].tensor_proto).item()) self.assertProtoEquals(graph.as_graph_def(add_shapes=True), acc.Graph()) self.assertProtoEquals(meta_graph_def, acc.MetaGraph())