Beispiel #1
0
 def testSingleUtf8StringTensor(self):
     tensor_info = meta_graph_pb2.TensorInfo(
         dtype=tf.string.as_datatype_enum)
     outputs_map = {"dummy": tensor_info}
     actual = mlprediction.encode_base64([[[u"a", u"b"]], [[u"c", u"d"]]],
                                         outputs_map)
     self.assertEqual(actual, [[[u"a", u"b"]], [[u"c", u"d"]]])
    def process(self, element, model_dir):
        try:
            element = element.element
        except AttributeError:
            pass

        try:
            if self._model_state is None:
                if (getattr(self._thread_local, "model_state", None) is None or
                        self._thread_local.model_state.model_dir != model_dir):
                    self._num_model_loads.inc(1)
                    self._thread_local.model_state = self._ModelState(
                        model_dir, self._skip_preprocessing)
                self._model_state = self._thread_local.model_state
            else:
                assert self._model_state.model_dir == model_dir

            # Try to load it.
            if (self._model_state.model.is_single_string_input()
                    or self._model_state.model.need_preprocess()):
                loaded_data = element
            else:
                loaded_data = [json.loads(d) for d in element]
            instances = mlprediction.decode_base64(loaded_data)
            inputs, predictions = self._model_state.model.predict(instances)
            predictions = list(predictions)
            predictions = mlprediction.encode_base64(
                predictions, self._model_state.model.outputs_type_map())

            if self._aggregator_dict:
                aggr = self._aggregator_dict.get(
                    aggregators.AggregatorName.ML_PREDICTIONS, None)
                if aggr:
                    aggr.inc(len(predictions))

            for i, p in zip(inputs, predictions):
                yield i, p

        except mlprediction.PredictionError as e:
            logging.error("Got a known exception: [%s]\n%s", e.error_message,
                          traceback.format_exc())
            if self._cloud_logger:
                # TODO(user): consider to write a sink to buffer the logging events. It
                # also eliminates the restarting/duplicated running issue.
                self._cloud_logger.write_error_message(
                    e.error_message, self._create_snippet(element))
            # reraise failure to load model as permanent exception to end dataflow job
            if e.error_code == mlprediction.PredictionError.FAILED_TO_LOAD_MODEL:
                raise beam.utils.retry.PermanentException(e.error_message)
            yield beam.pvalue.SideOutputValue("errors",
                                              (e.error_message, element))

        except Exception as e:  # pylint: disable=broad-except
            logging.error("Got an unknown exception: [%s].",
                          traceback.format_exc())
            if self._cloud_logger:
                self._cloud_logger.write_error_message(
                    str(e), self._create_snippet(element))
            yield beam.pvalue.SideOutputValue("errors", (str(e), element))
Beispiel #3
0
 def testSingleRank1BytesTensor(self):
     tensor_info = meta_graph_pb2.TensorInfo(
         dtype=tf.string.as_datatype_enum)
     outputs_map = {"dummy_bytes": tensor_info}
     actual = mlprediction.encode_base64([u"a", u"b", u"c"], outputs_map)
     self.assertEqual(actual, [
         {
             u"b64": base64.b64encode(u"a")
         },
         {
             u"b64": base64.b64encode(u"b")
         },
         {
             u"b64": base64.b64encode(u"c")
         },
     ])
Beispiel #4
0
 def testMultiTensorWithUtf8Strings(self):
     tensor_info_1 = meta_graph_pb2.TensorInfo(
         dtype=tf.string.as_datatype_enum)
     tensor_info_2 = meta_graph_pb2.TensorInfo(
         dtype=tf.string.as_datatype_enum)
     tensor_info_3 = meta_graph_pb2.TensorInfo(
         dtype=tf.float32.as_datatype_enum)
     outputs_map = {
         "tensor1": tensor_info_1,
         "tensor2": tensor_info_2,
         "tensor3": tensor_info_3,
     }
     actual = mlprediction.encode_base64([{
         u"tensor1": [[[u"a", u"b"]], [[u"c", u"d"]]],
         u"tensor2": [u"x", u"y", u"z"],
         u"tensor3": [1.0, -2.0, 3.14]
     }], outputs_map)
     self.assertEqual(actual, [{
         u"tensor1": [[[u"a", u"b"]], [[u"c", u"d"]]],
         u"tensor2": [u"x", u"y", u"z"],
         u"tensor3": [1.0, -2.0, 3.14]
     }])
    def process(self, element, model_dir):
        try:
            if isinstance(model_dir, ValueProvider):
                model_dir = model_dir.get()

            if self._model_state is None:
                if (getattr(self._thread_local, "model_state", None) is None or
                        self._thread_local.model_state.model_dir != model_dir):
                    start = datetime.datetime.now()
                    self._thread_local.model_state = self._ModelState(
                        model_dir, self._tag_list, self._signature_name,
                        self._skip_preprocessing)
                    self._model_load_seconds_distribution.update(
                        int((datetime.datetime.now() - start).total_seconds()))
                self._model_state = self._thread_local.model_state
            else:
                assert self._model_state.model_dir == model_dir

            # Try to load it.
            if self._model_state.model.is_single_string_input():
                loaded_data = element
            else:
                loaded_data = [json.loads(d) for d in element]
            instances = mlprediction.decode_base64(loaded_data)
            inputs, predictions = self._model_state.model.predict(instances)
            predictions = list(predictions)
            predictions = mlprediction.encode_base64(
                predictions, self._model_state.model.signature.outputs)

            if self._aggregator_dict:
                aggr = self._aggregator_dict.get(
                    aggregators.AggregatorName.ML_PREDICTIONS, None)
                if aggr:
                    aggr.inc(len(predictions))

            for i, p in zip(inputs, predictions):
                yield i, p

        except mlprediction.PredictionError as e:
            logging.error("Got a known exception: [%s]\n%s", str(e),
                          traceback.format_exc())
            clean_error_detail = error_filter.filter_tensorflow_error(
                e.error_detail)
            if self._cloud_logger:
                # TODO(user): consider to write a sink to buffer the logging events. It
                # also eliminates the restarting/duplicated running issue.
                self._cloud_logger.write_error_message(
                    clean_error_detail, self._create_snippet(element))
            # reraise failure to load model as permanent exception to end dataflow job
            if e.error_code == mlprediction.PredictionError.FAILED_TO_LOAD_MODEL:
                raise beam.utils.retry.PermanentException(clean_error_detail)
            try:
                yield beam.pvalue.TaggedOutput("errors",
                                               (clean_error_detail, element))
            except AttributeError:
                yield beam.pvalue.SideOutputValue(
                    "errors", (clean_error_detail, element))

        except Exception as e:  # pylint: disable=broad-except
            logging.error("Got an unknown exception: [%s].",
                          traceback.format_exc())
            if self._cloud_logger:
                self._cloud_logger.write_error_message(
                    str(e), self._create_snippet(element))
            try:
                yield beam.pvalue.TaggedOutput("errors", (str(e), element))
            except AttributeError:
                yield beam.pvalue.SideOutputValue("errors", (str(e), element))