Exemple #1
0
    def test_reshape(self):
        metadata = \
            {
                'col1': {
                    'dtype': float,
                    'intermediate_format': constants.NOCHANGE,
                    'max_size': 1,
                    'shape': 1
                },
                'col2': {
                    'dtype': SparseVector,
                    'intermediate_format': constants.CUSTOM_SPARSE,
                    'max_size': 5,
                    'shape': 10
                },
                'label': {
                    'dtype': float,
                    'intermediate_format': constants.NOCHANGE,
                    'max_size': 1,
                    'shape': 1
                },
            }

        feature_columns = ['col1', 'col2']
        label_columns = ['label']
        sample_weight_col = 'sample_weight'

        Row = collections.namedtuple(
            'row', ['col1', 'col2', 'sample_weight', 'label'])

        col11 = tf.constant([3.])
        col21 = tf.constant([3., 1., 3., 6., 10., 30., 60., 0, 0, 0, 0])
        label1 = tf.constant([1.])
        sw1 = tf.constant([.06])
        row1 = Row(col1=col11, col2=col21, label=label1, sample_weight=sw1)

        reshape_fn = TFKerasUtil._reshape_fn(sample_weight_col,
                                             feature_columns, label_columns,
                                             metadata)

        reshaped_row = reshape_fn(row1)
        reshaped_row_value = self.evaluate(reshaped_row)

        assert np.allclose(reshaped_row_value['sample_weight'],
                           np.array([0.06]))
        assert np.allclose(reshaped_row_value['col1'], np.array([3.]))
        assert np.allclose(
            reshaped_row_value['col2'],
            np.array([[0., 10., 0., 30., 0., 0., 60., 0., 0., 0.]]))
        assert np.allclose(reshaped_row_value['label'], np.array([1.]))
Exemple #2
0
    def read_serialized_keras_model(self, ckpt_path, model):
        """
        Returns serialized keras model. On Databricks, only TFKeras is supported, not BareKeras.
        The parameter `model` is for providing the model structure when the checkpoint file only
        contains model weights.
        """
        import tensorflow
        from tensorflow import keras
        from horovod.spark.keras.util import TFKerasUtil

        if LooseVersion(tensorflow.__version__) < LooseVersion("2.0.0"):
            model.load_weights(ckpt_path)
        else:
            model = keras.models.load_model(ckpt_path)
        return TFKerasUtil.serialize_model(model)
Exemple #3
0
    def read_serialized_keras_model(self, ckpt_path, model, custom_objects):
        """
        Returns serialized keras model.
        The parameter `model` is for providing the model structure when the checkpoint file only
        contains model weights.
        """
        import tensorflow
        from tensorflow import keras
        from horovod.spark.keras.util import TFKerasUtil

        if LooseVersion(tensorflow.__version__) < LooseVersion("2.0.0"):
            model.load_weights(ckpt_path)
        else:
            with keras.utils.custom_object_scope(custom_objects):
                model = keras.models.load_model(ckpt_path)
        return TFKerasUtil.serialize_model(model)
Exemple #4
0
    def test_prep_data_tf_keras_fn_without_sparse_col(self):
        has_sparse_col = False

        feature_columns = ['col1', 'col2']
        label_columns = ['label1', 'label2']
        sample_weight_col = 'sample_weight'

        col1 = tf.constant([3.])
        col2 = tf.constant([float(i) for i in range(10)])
        label1 = tf.constant([1., 2., 3., 4.])
        label2 = tf.constant([1., 2., 3., 4.])
        sw1 = tf.constant([.06])

        input_shapes = [[-1, 1], [-1, 2, 5]]
        output_shapes = [[-1, 4], [-1, 2, 2]]
        output_names = ['label1', 'label2']

        prep_data_tf_keras = \
            TFKerasUtil._prep_data_fn(has_sparse_col, sample_weight_col,
                                      feature_columns, label_columns, input_shapes,
                                      output_shapes, output_names)

        Row = collections.namedtuple(
            'row', ['col1', 'col2', sample_weight_col, 'label1', 'label2'])
        row = Row(col1=col1,
                  col2=col2,
                  label1=label1,
                  label2=label2,
                  sample_weight=sw1)

        prepped_row = prep_data_tf_keras(row)
        prepped_row_vals = self.evaluate(prepped_row)

        assert np.array_equal(prepped_row_vals[0][0], np.array([[3.]]))
        assert np.array_equal(
            prepped_row_vals[0][1],
            np.array([[[0., 1., 2., 3., 4.], [5., 6., 7., 8., 9.]]]))

        assert np.array_equal(prepped_row_vals[1][0],
                              np.array([[1., 2., 3., 4.]]))
        assert np.array_equal(prepped_row_vals[1][1],
                              np.array([[[1., 2.], [3., 4.]]]))

        assert np.allclose(prepped_row_vals[2]['label1'], np.array([0.06]))
        assert np.allclose(prepped_row_vals[2]['label2'], np.array([0.06]))
Exemple #5
0
    def test_prep_data_tf_keras_fn_with_sparse_col(self):
        has_sparse_col = True

        feature_columns = ['col1', 'col2']
        label_columns = ['label1', 'label2']
        sample_weight_col = 'sample_weight'

        col1 = tf.constant([3.])
        col2 = tf.constant([3., 1., 3., 6., 10., 30., 60., 0, 0, 0])
        label1 = tf.constant([1., 2., 3., 4.])
        label2 = tf.constant([1., 2., 3., 4.])
        sw1 = tf.constant([.06])

        input_shapes = [[-1, 1], [-1, 2, 5]]
        output_shapes = [[-1, 4], [-1, 2, 2]]
        output_names = ['label1', 'label2']

        prep_data_tf_keras = \
            TFKerasUtil._prep_data_fn(has_sparse_col, sample_weight_col,
                                      feature_columns, label_columns, input_shapes,
                                      output_shapes, output_names)

        row = {
            'col1': col1,
            'col2': col2,
            'label1': label1,
            'label2': label2,
            sample_weight_col: sw1
        }

        prepped_row = prep_data_tf_keras(row)
        prepped_row_vals = self.evaluate(prepped_row)

        assert np.array_equal(prepped_row_vals[0][0], np.array([[3.]]))
        assert np.array_equal(
            prepped_row_vals[0][1],
            np.array([[[3., 1., 3., 6., 10.], [30., 60., 0., 0., 0.]]]))

        assert np.array_equal(prepped_row_vals[1][0],
                              np.array([[1., 2., 3., 4.]]))
        assert np.array_equal(prepped_row_vals[1][1],
                              np.array([[[1., 2.], [3., 4.]]]))

        assert np.allclose(prepped_row_vals[2]['label1'], np.array([0.06]))
        assert np.allclose(prepped_row_vals[2]['label2'], np.array([0.06]))
Exemple #6
0
    def read_serialized_keras_model(self, ckpt_path, model, custom_objects):
        """Reads the checkpoint file of the keras model into model bytes and returns the base 64
        encoded model bytes.
        :param ckpt_path: A string of path to the checkpoint file.
        :param model: A keras model. This parameter will be used in DBFSLocalStore\
            .read_serialized_keras_model() when the ckpt_path only contains model weights.
        :param custom_objects: This parameter will be used in DBFSLocalStore\
            .read_serialized_keras_model() when loading the keras model.
        :return: the base 64 encoded model bytes of the checkpoint model.
        """
        from horovod.runner.common.util import codec
        import tensorflow
        from tensorflow import keras
        from horovod.spark.keras.util import TFKerasUtil

        if LooseVersion(tensorflow.__version__) < LooseVersion("2.0.0"):
            model_bytes = self.read(ckpt_path)
            return codec.dumps_base64(model_bytes)
        else:
            with keras.utils.custom_object_scope(custom_objects):
                model = keras.models.load_model(ckpt_path)
            return TFKerasUtil.serialize_model(model)