Beispiel #1
0
    def _create_point(self, tag_proto, event, value):
        """Adds a scalar point to the given tag, if there's space.

        Args:
          tag_proto: `WriteScalarRequest.Tag` proto to which to add a point.
          event: Enclosing `Event` proto with the step and wall time data.
          value: Scalar `Summary.Value` proto with the actual scalar data.

        Returns:
          The `ScalarPoint` that was added to `tag_proto.points`.

        Raises:
          _OutOfSpaceError: If adding the point would exceed the remaining
            request budget.
        """
        point = tag_proto.points.add()
        point.step = event.step
        # TODO(@nfelt): skip tensor roundtrip for Value with simple_value set
        point.value = tensor_util.make_ndarray(value.tensor).item()
        util.set_timestamp(point.wall_time, event.wall_time)
        submessage_cost = point.ByteSize()
        cost = submessage_cost + _varint_cost(submessage_cost) + 1  # proto key
        if cost > self._byte_budget:
            tag_proto.points.pop()
            raise _OutOfSpaceError()
        self._byte_budget -= cost
        return point
Beispiel #2
0
    def _get_or_create_blob_sequence(self):
        request = write_service_pb2.GetOrCreateBlobSequenceRequest(
            experiment_id=self._experiment_id,
            run=self._run_name,
            tag=self._value.tag,
            step=self._event.step,
            final_sequence_length=len(self._blobs),
            metadata=self._metadata,
        )
        util.set_timestamp(request.wall_time, self._event.wall_time)
        with _request_logger(request):
            try:
                # TODO(@nfelt): execute this RPC asynchronously.
                response = grpc_util.call_with_retries(
                    self._api.GetOrCreateBlobSequence, request
                )
                blob_sequence_id = response.blob_sequence_id
            except grpc.RpcError as e:
                if e.code() == grpc.StatusCode.NOT_FOUND:
                    raise ExperimentNotFoundError()
                logger.error("Upload call failed with error %s", e)
                # TODO(soergel): clean up
                raise

        return blob_sequence_id
Beispiel #3
0
    def _create_point(self, tag_proto, event, value):
        """Adds a tensor point to the given tag, if there's space.

        Args:
          tag_proto: `WriteTensorRequest.Tag` proto to which to add a point.
          event: Enclosing `Event` proto with the step and wall time data.
          value: Tensor `Summary.Value` proto with the actual tensor data.

        Returns:
          The `TensorPoint` that was added to `tag_proto.points`.

        Raises:
          _OutOfSpaceError: If adding the point would exceed the remaining
            request budget.
        """
        point = tag_proto.points.add()
        point.step = event.step
        point.value.CopyFrom(value.tensor)
        util.set_timestamp(point.wall_time, event.wall_time)
        try:
            self._byte_budget_manager.add_point(point)
        except _OutOfSpaceError:
            tag_proto.points.pop()
            raise
        return point
 def _request_scalar_data(self, experiment_id, read_time):
     """Yields JSON-serializable blocks of scalar data."""
     request = export_service_pb2.StreamExperimentDataRequest()
     request.experiment_id = experiment_id
     util.set_timestamp(request.read_timestamp, read_time)
     # No special error handling as we don't expect any errors from these
     # calls: all experiments should exist (read consistency timestamp)
     # and be owned by the calling user (only queried for own experiment
     # IDs). Any non-transient errors would be internal, and we have no
     # way to efficiently resume from transient errors because the server
     # does not support pagination.
     stream = self._api.StreamExperimentData(
         request, metadata=grpc_util.version_metadata())
     for response in stream:
         metadata = base64.b64encode(
             response.tag_metadata.SerializeToString()).decode("ascii")
         wall_times = [
             t.ToNanoseconds() / 1e9 for t in response.points.wall_times
         ]
         yield {
             u"run": response.run_name,
             u"tag": response.tag_name,
             u"summary_metadata": metadata,
             u"points": {
                 u"steps": list(response.points.steps),
                 u"wall_times": wall_times,
                 u"values": list(response.points.values),
             },
         }
 def testJsonFormatterWithEmptyNameAndDescription(self):
     experiment = experiment_pb2.Experiment(
         experiment_id="deadbeef",
         # NOTE(cais): `name` and `description` are missing here.
         num_runs=2,
         num_tags=4,
         num_scalars=60,
         total_blob_bytes=1234,
     )
     util.set_timestamp(experiment.create_time, 981173106)
     util.set_timestamp(experiment.update_time, 1015218367)
     experiment_url = "http://tensorboard.dev/deadbeef"
     formatter = formatters.JsonFormatter()
     output = self._format(formatter, experiment, experiment_url)
     expected_lines = [
         "{",
         '  "url": "http://tensorboard.dev/deadbeef",',
         '  "name": "",',
         '  "description": "",',
         '  "id": "deadbeef",',
         '  "created": "2001-02-03T04:05:06Z",',
         '  "updated": "2002-03-04T05:06:07Z",',
         '  "runs": 2,',
         '  "tags": 4,',
         '  "scalars": 60,',
         '  "binary_object_bytes": 1234',
         "}",
     ]
     self.assertEqual(output.split("\n"), expected_lines)
Beispiel #6
0
def list_experiments(api_client, fieldmask=None, read_time=None):
    """Yields all of the calling user's experiments.

    Args:
      api_client: A TensorBoardExporterService stub instance.
      fieldmask: An optional `experiment_pb2.ExperimentMask` value.
      read_time: A fixed timestamp from which to export data, as float seconds
        since epoch (like `time.time()`). Optional; defaults to the current
        time.

    Yields:
      For each experiment owned by the user, an `experiment_pb2.Experiment`
      value, or a simple string experiment ID for older servers.
    """
    if read_time is None:
        read_time = time.time()
    request = export_service_pb2.StreamExperimentsRequest(limit=_MAX_INT64)
    util.set_timestamp(request.read_timestamp, read_time)
    if fieldmask:
        request.experiments_mask.CopyFrom(fieldmask)
    stream = api_client.StreamExperiments(
        request, metadata=grpc_util.version_metadata())
    for response in stream:
        if response.experiments:
            for experiment in response.experiments:
                yield experiment
        else:
            # Old servers.
            for experiment_id in response.experiment_ids:
                yield experiment_id
 def testReadableFormatterWithEmptyNameAndDescription(self):
     experiment = experiment_pb2.Experiment(
         experiment_id="deadbeef",
         # NOTE(cais): `name` and `description` are missing here.
         num_runs=2,
         num_tags=4,
         num_scalars=60,
         total_blob_bytes=1234,
     )
     util.set_timestamp(experiment.create_time, 981173106)
     util.set_timestamp(experiment.update_time, 1015218367)
     experiment_url = "http://tensorboard.dev/deadbeef"
     formatter = formatters.ReadableFormatter()
     output = self._format(formatter, experiment, experiment_url)
     expected_lines = [
         "http://tensorboard.dev/deadbeef",
         "\tName                 [No Name]",
         "\tDescription          [No Description]",
         "\tId                   deadbeef",
         "\tCreated              2001-02-03 04:05:06",
         "\tUpdated              2002-03-04 05:06:07",
         "\tRuns                 2",
         "\tTags                 4",
         "\tScalars              60",
         "\tBinary object bytes  1234",
     ]
     self.assertEqual(output.split("\n"), expected_lines)
Beispiel #8
0
    def _create_point(self, tag_proto, event, value):
        """Adds a tensor point to the given tag, if there's space.

        Args:
          tag_proto: `WriteTensorRequest.Tag` proto to which to add a point.
          event: Enclosing `Event` proto with the step and wall time data.
          value: Tensor `Summary.Value` proto with the actual tensor data.

        Raises:
          _OutOfSpaceError: If adding the point would exceed the remaining
            request budget.
        """
        point = tag_proto.points.add()
        point.step = event.step
        point.value.CopyFrom(value.tensor)
        util.set_timestamp(point.wall_time, event.wall_time)

        if point.value.ByteSize() > self._max_tensor_point_size:
            logger.warning(
                "Tensor too large; skipping. "
                "Size %d exceeds limit of %d bytes.",
                point.value.ByteSize(),
                self._max_tensor_point_size,
            )
            tag_proto.points.pop()
            return

        try:
            self._byte_budget_manager.add_point(point)
        except _OutOfSpaceError:
            tag_proto.points.pop()
            raise
 def testReadableFormatterWithNonUtcTimezone(self):
     experiment = experiment_pb2.Experiment(
         experiment_id="deadbeef",
         name="A name for the experiment",
         description="A description for the experiment",
         num_runs=2,
         num_tags=4,
         num_scalars=60,
         total_blob_bytes=1234,
     )
     util.set_timestamp(experiment.create_time, 981173106)
     util.set_timestamp(experiment.update_time, 1015218367)
     experiment_url = "http://tensorboard.dev/deadbeef"
     formatter = formatters.ReadableFormatter()
     output = self._format(
         formatter,
         experiment,
         experiment_url,
         timezone="America/Los_Angeles",
     )
     expected_lines = [
         "http://tensorboard.dev/deadbeef",
         "\tName                 A name for the experiment",
         "\tDescription          A description for the experiment",
         "\tId                   deadbeef",
         "\tCreated              2001-02-02 20:05:06",
         "\tUpdated              2002-03-03 21:06:07",
         "\tRuns                 2",
         "\tTags                 4",
         "\tScalars              60",
         "\tBinary object bytes  1234",
     ]
     self.assertEqual(output.split("\n"), expected_lines)
Beispiel #10
0
    def _request_json_data(self, experiment_id, read_time):
        """Given experiment id, generates JSON data and destination file name.

        The JSON data describes the run, tag, metadata, in addition to
          - Actual data in the case of scalars
          - Pointer to binary files in the case of blob sequences.

        For the case of blob sequences, this method has the side effect of
          downloading the contents of the blobs and writing them to files in
          a subdirectory of the experiment directory.

        Args:
          experiment_id: The id of the experiment to request data for.
          read_time: A fixed timestamp from which to export data, as float
            seconds since epoch (like `time.time()`). Optional; defaults to the
            current time.

        Yields:
          (JSON-serializable data, destination file name) tuples.
        """
        request = export_service_pb2.StreamExperimentDataRequest()
        request.experiment_id = experiment_id
        util.set_timestamp(request.read_timestamp, read_time)
        # No special error handling as we don't expect any errors from these
        # calls: all experiments should exist (read consistency timestamp)
        # and be owned by the calling user (only queried for own experiment
        # IDs). Any non-transient errors would be internal, and we have no
        # way to efficiently resume from transient errors because the server
        # does not support pagination.
        stream = self._api.StreamExperimentData(
            request, metadata=grpc_util.version_metadata()
        )
        for response in stream:
            metadata = base64.b64encode(
                response.tag_metadata.SerializeToString()
            ).decode("ascii")
            json_data = {
                u"run": response.run_name,
                u"tag": response.tag_name,
                u"summary_metadata": metadata,
            }
            filename = None
            if response.HasField("points"):
                json_data[u"points"] = self._process_scalar_points(
                    response.points
                )
                filename = _FILENAME_SCALARS
            elif response.HasField("tensors"):
                json_data[u"points"] = self._process_tensor_points(
                    response.tensors, experiment_id
                )
                filename = _FILENAME_TENSORS
            elif response.HasField("blob_sequences"):
                json_data[u"points"] = self._process_blob_sequence_points(
                    response.blob_sequences, experiment_id
                )
                filename = _FILENAME_BLOB_SEQUENCES
            if filename:
                yield json_data, filename
Beispiel #11
0
 def _request_experiment_ids(self, read_time):
   """Yields all of the calling user's experiment IDs, as strings."""
   request = export_service_pb2.StreamExperimentsRequest(limit=_MAX_INT64)
   util.set_timestamp(request.read_timestamp, read_time)
   stream = self._api.StreamExperiments(request)
   for response in stream:
     for experiment_id in response.experiment_ids:
       yield experiment_id
Beispiel #12
0
 def test_set_timestamp(self):
     pb = timestamp_pb2.Timestamp()
     t = 1234567890.007812500
     # Note that just multiplying by 1e9 would lose precision:
     self.assertEqual(int(t * 1e9) % int(1e9), 7812608)
     util.set_timestamp(pb, t)
     self.assertEqual(pb.seconds, 1234567890)
     self.assertEqual(pb.nanos, 7812500)
Beispiel #13
0
    def get_scalars(
        self,
        runs_filter=None,
        tags_filter=None,
        pivot=False,
        include_wall_time=False,
    ):
        # NOTE(#3650): Import pandas early in this method, so if the
        # Python environment does not have pandas installed, an error can be
        # raised early, before any rpc call is made.
        pandas = import_pandas()
        if runs_filter is not None:
            raise NotImplementedError(
                "runs_filter support for get_scalars() is not implemented yet."
            )
        if tags_filter is not None:
            raise NotImplementedError(
                "tags_filter support for get_scalars() is not implemented yet."
            )

        request = export_service_pb2.StreamExperimentDataRequest()
        request.experiment_id = self._experiment_id
        read_time = time.time()
        util.set_timestamp(request.read_timestamp, read_time)
        # TODO(cais, wchargin): Use another rpc to check for staleness and avoid
        # a new StreamExperimentData rpc request if data is not stale.
        stream = self._api_client.StreamExperimentData(
            request, metadata=grpc_util.version_metadata()
        )

        runs = []
        tags = []
        steps = []
        wall_times = []
        values = []
        for response in stream:
            # TODO(cais, wchargin): Display progress bar during data loading.
            num_values = len(response.points.values)
            runs.extend([response.run_name] * num_values)
            tags.extend([response.tag_name] * num_values)
            steps.extend(list(response.points.steps))
            wall_times.extend(
                [t.ToNanoseconds() / 1e9 for t in response.points.wall_times]
            )
            values.extend(list(response.points.values))

        data = {
            "run": runs,
            "tag": tags,
            "step": steps,
            "value": values,
        }
        if include_wall_time:
            data["wall_time"] = wall_times
        dataframe = pandas.DataFrame(data)
        if pivot:
            dataframe = self._pivot_dataframe(dataframe)
        return dataframe
Beispiel #14
0
 def _run(self, t=None, tz=None):
     timestamp_pb = timestamp_pb2.Timestamp()
     util.set_timestamp(timestamp_pb, t)
     try:
         with mock.patch.dict(os.environ, {"TZ": tz}):
             time.tzset()
             return util.format_time_absolute(timestamp_pb)
     finally:
         time.tzset()
Beispiel #15
0
 def _run(self, t=None, now=None):
     timestamp_pb = timestamp_pb2.Timestamp()
     util.set_timestamp(timestamp_pb, t)
     try:
         with mock.patch.dict(os.environ, {"TZ": "UTC"}):
             time.tzset()
             now = datetime.datetime.fromtimestamp(now)
             return util.format_time(timestamp_pb, now=now)
     finally:
         time.tzset()
Beispiel #16
0
        def stream_experiments(request, **kwargs):
            del request  # unused
            self.assertEqual(kwargs["metadata"], grpc_util.version_metadata())

            response = export_service_pb2.StreamExperimentsResponse()
            response.experiments.add(experiment_id="123")
            response.experiments.add(experiment_id="456")
            yield response

            response = export_service_pb2.StreamExperimentsResponse()
            experiment = response.experiments.add()
            experiment.experiment_id = "789"
            experiment.name = "bert"
            experiment.description = "ernie"
            util.set_timestamp(experiment.create_time, 981173106)
            util.set_timestamp(experiment.update_time, 1015218367)
            yield response
Beispiel #17
0
    def get_scalars(self, runs_filter=None, tags_filter=None, pivot=None):
        if runs_filter is not None:
            raise NotImplementedError(
                "runs_filter support for get_scalars() is not implemented yet."
            )
        if tags_filter is not None:
            raise NotImplementedError(
                "tags_filter support for get_scalars() is not implemented yet."
            )
        pivot = True if pivot is None else pivot

        request = export_service_pb2.StreamExperimentDataRequest()
        request.experiment_id = self._experiment_id
        read_time = time.time()
        util.set_timestamp(request.read_timestamp, read_time)
        # TODO(cais, wchargin): Use another rpc to check for staleness and avoid
        # a new StreamExperimentData rpc request if data is not stale.
        stream = self._api_client.StreamExperimentData(
            request, metadata=grpc_util.version_metadata())

        runs = []
        tags = []
        steps = []
        wall_times = []
        values = []
        for response in stream:
            # TODO(cais, wchargin): Display progress bar during data loading.
            num_values = len(response.points.values)
            runs.extend([response.run_name] * num_values)
            tags.extend([response.tag_name] * num_values)
            steps.extend(list(response.points.steps))
            wall_times.extend(
                [t.ToNanoseconds() / 1e9 for t in response.points.wall_times])
            values.extend(list(response.points.values))

        dataframe = pandas.DataFrame({
            "run": runs,
            "tag": tags,
            "step": steps,
            "wall_time": wall_times,
            "value": values,
        })
        if pivot:
            dataframe = self._pivot_dataframe(dataframe)
        return dataframe
Beispiel #18
0
    def _create_point(self, tag_proto, event, value, run_name):
        """Adds a tensor point to the given tag, if there's space.

        Args:
          tag_proto: `WriteTensorRequest.Tag` proto to which to add a point.
          event: Enclosing `Event` proto with the step and wall time data.
          value: Tensor `Summary.Value` proto with the actual tensor data.
          run_name: Name of the wrong, only used for error reporting.

        Raises:
          _OutOfSpaceError: If adding the point would exceed the remaining
            request budget.
        """
        point = tag_proto.points.add()
        point.step = event.step
        point.value.CopyFrom(value.tensor)
        util.set_timestamp(point.wall_time, event.wall_time)

        self._num_values += 1
        self._tensor_bytes += point.value.ByteSize()
        if point.value.ByteSize() > self._max_tensor_point_size:
            logger.warning(
                "Tensor (run:%s, tag:%s, step: %d) too large; skipping. "
                "Size %d exceeds limit of %d bytes.",
                run_name,
                tag_proto.name,
                event.step,
                point.value.ByteSize(),
                self._max_tensor_point_size,
            )
            tag_proto.points.pop()
            self._num_values_skipped += 1
            self._tensor_bytes_skipped += point.value.ByteSize()
            return

        self._validate_tensor_value(value.tensor, value.tag, event.step,
                                    event.wall_time)

        try:
            self._byte_budget_manager.add_point(point)
        except _OutOfSpaceError:
            tag_proto.points.pop()
            raise
Beispiel #19
0
def list_experiments(api_client, fieldmask=None, read_time=None):
    """Yields all of the calling user's experiments.

    Args:
      api_client: A TensorBoardExporterService stub instance.
      fieldmask: An optional `experiment_pb2.ExperimentMask` value.
      read_time: A fixed timestamp from which to export data, as float seconds
        since epoch (like `time.time()`). Optional; defaults to the current
        time.

    Yields:
      For each experiment owned by the user, an `experiment_pb2.Experiment`
      value.

    Raises:
      RuntimeError: If the server returns experiment IDs but no experiments,
        as in an old, unsupported version of the protocol.
    """
    if read_time is None:
        read_time = time.time()
    request = export_service_pb2.StreamExperimentsRequest(limit=_MAX_INT64)
    util.set_timestamp(request.read_timestamp, read_time)
    if fieldmask:
        request.experiments_mask.CopyFrom(fieldmask)
    stream = api_client.StreamExperiments(
        request, metadata=grpc_util.version_metadata()
    )
    for response in stream:
        if response.experiments:
            for experiment in response.experiments:
                yield experiment
        elif response.experiment_ids:
            raise RuntimeError(
                "Server sent experiment_ids without experiments: <%r>"
                % (list(response.experiment_ids),)
            )
        else:
            # No data: not technically a problem, but not expected.
            logger.warning(
                "StreamExperiments RPC returned response with no experiments: <%r>",
                response,
            )
Beispiel #20
0
    def _create_point(self, tag_proto, event, value):
        """Adds a scalar point to the given tag, if there's space.

        Args:
          tag_proto: `WriteScalarRequest.Tag` proto to which to add a point.
          event: Enclosing `Event` proto with the step and wall time data.
          value: Scalar `Summary.Value` proto with the actual scalar data.

        Raises:
          _OutOfSpaceError: If adding the point would exceed the remaining
            request budget.
        """
        point = tag_proto.points.add()
        point.step = event.step
        # TODO(@nfelt): skip tensor roundtrip for Value with simple_value set
        point.value = tensor_util.make_ndarray(value.tensor).item()
        util.set_timestamp(point.wall_time, event.wall_time)
        try:
            self._byte_budget_manager.add_point(point)
        except _OutOfSpaceError:
            tag_proto.points.pop()
            raise
Beispiel #21
0
def list_experiments(api_client, read_time=None):
    """Yields all of the calling user's experiment IDs.

  Args:
    api_client: A TensorBoardExporterService stub instance.
    read_time: A fixed timestamp from which to export data, as float seconds
      since epoch (like `time.time()`). Optional; defaults to the current
      time.

  Yields:
    One string for each experiment owned by the calling user, in arbitrary
    order.
  """
    if read_time is None:
        read_time = time.time()
    request = export_service_pb2.StreamExperimentsRequest(limit=_MAX_INT64)
    util.set_timestamp(request.read_timestamp, read_time)
    stream = api_client.StreamExperiments(
        request, metadata=grpc_util.version_metadata())
    for response in stream:
        for experiment_id in response.experiment_ids:
            yield experiment_id