def testSingleUtf8StringTensor(self): tensor_info = meta_graph_pb2.TensorInfo( dtype=tf.string.as_datatype_enum) outputs_map = {"dummy": tensor_info} actual = mlprediction.encode_base64([[[u"a", u"b"]], [[u"c", u"d"]]], outputs_map) self.assertEqual(actual, [[[u"a", u"b"]], [[u"c", u"d"]]])
def process(self, element, model_dir): try: element = element.element except AttributeError: pass try: if self._model_state is None: if (getattr(self._thread_local, "model_state", None) is None or self._thread_local.model_state.model_dir != model_dir): self._num_model_loads.inc(1) self._thread_local.model_state = self._ModelState( model_dir, self._skip_preprocessing) self._model_state = self._thread_local.model_state else: assert self._model_state.model_dir == model_dir # Try to load it. if (self._model_state.model.is_single_string_input() or self._model_state.model.need_preprocess()): loaded_data = element else: loaded_data = [json.loads(d) for d in element] instances = mlprediction.decode_base64(loaded_data) inputs, predictions = self._model_state.model.predict(instances) predictions = list(predictions) predictions = mlprediction.encode_base64( predictions, self._model_state.model.outputs_type_map()) if self._aggregator_dict: aggr = self._aggregator_dict.get( aggregators.AggregatorName.ML_PREDICTIONS, None) if aggr: aggr.inc(len(predictions)) for i, p in zip(inputs, predictions): yield i, p except mlprediction.PredictionError as e: logging.error("Got a known exception: [%s]\n%s", e.error_message, traceback.format_exc()) if self._cloud_logger: # TODO(user): consider to write a sink to buffer the logging events. It # also eliminates the restarting/duplicated running issue. self._cloud_logger.write_error_message( e.error_message, self._create_snippet(element)) # reraise failure to load model as permanent exception to end dataflow job if e.error_code == mlprediction.PredictionError.FAILED_TO_LOAD_MODEL: raise beam.utils.retry.PermanentException(e.error_message) yield beam.pvalue.SideOutputValue("errors", (e.error_message, element)) except Exception as e: # pylint: disable=broad-except logging.error("Got an unknown exception: [%s].", traceback.format_exc()) if self._cloud_logger: self._cloud_logger.write_error_message( str(e), self._create_snippet(element)) yield beam.pvalue.SideOutputValue("errors", (str(e), element))
def testSingleRank1BytesTensor(self): tensor_info = meta_graph_pb2.TensorInfo( dtype=tf.string.as_datatype_enum) outputs_map = {"dummy_bytes": tensor_info} actual = mlprediction.encode_base64([u"a", u"b", u"c"], outputs_map) self.assertEqual(actual, [ { u"b64": base64.b64encode(u"a") }, { u"b64": base64.b64encode(u"b") }, { u"b64": base64.b64encode(u"c") }, ])
def testMultiTensorWithUtf8Strings(self): tensor_info_1 = meta_graph_pb2.TensorInfo( dtype=tf.string.as_datatype_enum) tensor_info_2 = meta_graph_pb2.TensorInfo( dtype=tf.string.as_datatype_enum) tensor_info_3 = meta_graph_pb2.TensorInfo( dtype=tf.float32.as_datatype_enum) outputs_map = { "tensor1": tensor_info_1, "tensor2": tensor_info_2, "tensor3": tensor_info_3, } actual = mlprediction.encode_base64([{ u"tensor1": [[[u"a", u"b"]], [[u"c", u"d"]]], u"tensor2": [u"x", u"y", u"z"], u"tensor3": [1.0, -2.0, 3.14] }], outputs_map) self.assertEqual(actual, [{ u"tensor1": [[[u"a", u"b"]], [[u"c", u"d"]]], u"tensor2": [u"x", u"y", u"z"], u"tensor3": [1.0, -2.0, 3.14] }])
def process(self, element, model_dir): try: if isinstance(model_dir, ValueProvider): model_dir = model_dir.get() if self._model_state is None: if (getattr(self._thread_local, "model_state", None) is None or self._thread_local.model_state.model_dir != model_dir): start = datetime.datetime.now() self._thread_local.model_state = self._ModelState( model_dir, self._tag_list, self._signature_name, self._skip_preprocessing) self._model_load_seconds_distribution.update( int((datetime.datetime.now() - start).total_seconds())) self._model_state = self._thread_local.model_state else: assert self._model_state.model_dir == model_dir # Try to load it. if self._model_state.model.is_single_string_input(): loaded_data = element else: loaded_data = [json.loads(d) for d in element] instances = mlprediction.decode_base64(loaded_data) inputs, predictions = self._model_state.model.predict(instances) predictions = list(predictions) predictions = mlprediction.encode_base64( predictions, self._model_state.model.signature.outputs) if self._aggregator_dict: aggr = self._aggregator_dict.get( aggregators.AggregatorName.ML_PREDICTIONS, None) if aggr: aggr.inc(len(predictions)) for i, p in zip(inputs, predictions): yield i, p except mlprediction.PredictionError as e: logging.error("Got a known exception: [%s]\n%s", str(e), traceback.format_exc()) clean_error_detail = error_filter.filter_tensorflow_error( e.error_detail) if self._cloud_logger: # TODO(user): consider to write a sink to buffer the logging events. It # also eliminates the restarting/duplicated running issue. self._cloud_logger.write_error_message( clean_error_detail, self._create_snippet(element)) # reraise failure to load model as permanent exception to end dataflow job if e.error_code == mlprediction.PredictionError.FAILED_TO_LOAD_MODEL: raise beam.utils.retry.PermanentException(clean_error_detail) try: yield beam.pvalue.TaggedOutput("errors", (clean_error_detail, element)) except AttributeError: yield beam.pvalue.SideOutputValue( "errors", (clean_error_detail, element)) except Exception as e: # pylint: disable=broad-except logging.error("Got an unknown exception: [%s].", traceback.format_exc()) if self._cloud_logger: self._cloud_logger.write_error_message( str(e), self._create_snippet(element)) try: yield beam.pvalue.TaggedOutput("errors", (str(e), element)) except AttributeError: yield beam.pvalue.SideOutputValue("errors", (str(e), element))