def test_reshape(self): metadata = \ { 'col1': { 'dtype': float, 'intermediate_format': constants.NOCHANGE, 'max_size': 1, 'shape': 1 }, 'col2': { 'dtype': SparseVector, 'intermediate_format': constants.CUSTOM_SPARSE, 'max_size': 5, 'shape': 10 }, 'label': { 'dtype': float, 'intermediate_format': constants.NOCHANGE, 'max_size': 1, 'shape': 1 }, } feature_columns = ['col1', 'col2'] label_columns = ['label'] sample_weight_col = 'sample_weight' Row = collections.namedtuple( 'row', ['col1', 'col2', 'sample_weight', 'label']) col11 = tf.constant([3.]) col21 = tf.constant([3., 1., 3., 6., 10., 30., 60., 0, 0, 0, 0]) label1 = tf.constant([1.]) sw1 = tf.constant([.06]) row1 = Row(col1=col11, col2=col21, label=label1, sample_weight=sw1) reshape_fn = TFKerasUtil._reshape_fn(sample_weight_col, feature_columns, label_columns, metadata) reshaped_row = reshape_fn(row1) reshaped_row_value = self.evaluate(reshaped_row) assert np.allclose(reshaped_row_value['sample_weight'], np.array([0.06])) assert np.allclose(reshaped_row_value['col1'], np.array([3.])) assert np.allclose( reshaped_row_value['col2'], np.array([[0., 10., 0., 30., 0., 0., 60., 0., 0., 0.]])) assert np.allclose(reshaped_row_value['label'], np.array([1.]))
def read_serialized_keras_model(self, ckpt_path, model): """ Returns serialized keras model. On Databricks, only TFKeras is supported, not BareKeras. The parameter `model` is for providing the model structure when the checkpoint file only contains model weights. """ import tensorflow from tensorflow import keras from horovod.spark.keras.util import TFKerasUtil if LooseVersion(tensorflow.__version__) < LooseVersion("2.0.0"): model.load_weights(ckpt_path) else: model = keras.models.load_model(ckpt_path) return TFKerasUtil.serialize_model(model)
def read_serialized_keras_model(self, ckpt_path, model, custom_objects): """ Returns serialized keras model. The parameter `model` is for providing the model structure when the checkpoint file only contains model weights. """ import tensorflow from tensorflow import keras from horovod.spark.keras.util import TFKerasUtil if LooseVersion(tensorflow.__version__) < LooseVersion("2.0.0"): model.load_weights(ckpt_path) else: with keras.utils.custom_object_scope(custom_objects): model = keras.models.load_model(ckpt_path) return TFKerasUtil.serialize_model(model)
def test_prep_data_tf_keras_fn_without_sparse_col(self): has_sparse_col = False feature_columns = ['col1', 'col2'] label_columns = ['label1', 'label2'] sample_weight_col = 'sample_weight' col1 = tf.constant([3.]) col2 = tf.constant([float(i) for i in range(10)]) label1 = tf.constant([1., 2., 3., 4.]) label2 = tf.constant([1., 2., 3., 4.]) sw1 = tf.constant([.06]) input_shapes = [[-1, 1], [-1, 2, 5]] output_shapes = [[-1, 4], [-1, 2, 2]] output_names = ['label1', 'label2'] prep_data_tf_keras = \ TFKerasUtil._prep_data_fn(has_sparse_col, sample_weight_col, feature_columns, label_columns, input_shapes, output_shapes, output_names) Row = collections.namedtuple( 'row', ['col1', 'col2', sample_weight_col, 'label1', 'label2']) row = Row(col1=col1, col2=col2, label1=label1, label2=label2, sample_weight=sw1) prepped_row = prep_data_tf_keras(row) prepped_row_vals = self.evaluate(prepped_row) assert np.array_equal(prepped_row_vals[0][0], np.array([[3.]])) assert np.array_equal( prepped_row_vals[0][1], np.array([[[0., 1., 2., 3., 4.], [5., 6., 7., 8., 9.]]])) assert np.array_equal(prepped_row_vals[1][0], np.array([[1., 2., 3., 4.]])) assert np.array_equal(prepped_row_vals[1][1], np.array([[[1., 2.], [3., 4.]]])) assert np.allclose(prepped_row_vals[2]['label1'], np.array([0.06])) assert np.allclose(prepped_row_vals[2]['label2'], np.array([0.06]))
def test_prep_data_tf_keras_fn_with_sparse_col(self): has_sparse_col = True feature_columns = ['col1', 'col2'] label_columns = ['label1', 'label2'] sample_weight_col = 'sample_weight' col1 = tf.constant([3.]) col2 = tf.constant([3., 1., 3., 6., 10., 30., 60., 0, 0, 0]) label1 = tf.constant([1., 2., 3., 4.]) label2 = tf.constant([1., 2., 3., 4.]) sw1 = tf.constant([.06]) input_shapes = [[-1, 1], [-1, 2, 5]] output_shapes = [[-1, 4], [-1, 2, 2]] output_names = ['label1', 'label2'] prep_data_tf_keras = \ TFKerasUtil._prep_data_fn(has_sparse_col, sample_weight_col, feature_columns, label_columns, input_shapes, output_shapes, output_names) row = { 'col1': col1, 'col2': col2, 'label1': label1, 'label2': label2, sample_weight_col: sw1 } prepped_row = prep_data_tf_keras(row) prepped_row_vals = self.evaluate(prepped_row) assert np.array_equal(prepped_row_vals[0][0], np.array([[3.]])) assert np.array_equal( prepped_row_vals[0][1], np.array([[[3., 1., 3., 6., 10.], [30., 60., 0., 0., 0.]]])) assert np.array_equal(prepped_row_vals[1][0], np.array([[1., 2., 3., 4.]])) assert np.array_equal(prepped_row_vals[1][1], np.array([[[1., 2.], [3., 4.]]])) assert np.allclose(prepped_row_vals[2]['label1'], np.array([0.06])) assert np.allclose(prepped_row_vals[2]['label2'], np.array([0.06]))
def read_serialized_keras_model(self, ckpt_path, model, custom_objects): """Reads the checkpoint file of the keras model into model bytes and returns the base 64 encoded model bytes. :param ckpt_path: A string of path to the checkpoint file. :param model: A keras model. This parameter will be used in DBFSLocalStore\ .read_serialized_keras_model() when the ckpt_path only contains model weights. :param custom_objects: This parameter will be used in DBFSLocalStore\ .read_serialized_keras_model() when loading the keras model. :return: the base 64 encoded model bytes of the checkpoint model. """ from horovod.runner.common.util import codec import tensorflow from tensorflow import keras from horovod.spark.keras.util import TFKerasUtil if LooseVersion(tensorflow.__version__) < LooseVersion("2.0.0"): model_bytes = self.read(ckpt_path) return codec.dumps_base64(model_bytes) else: with keras.utils.custom_object_scope(custom_objects): model = keras.models.load_model(ckpt_path) return TFKerasUtil.serialize_model(model)