예제 #1
0
    def one_hot_encoder_serializer_test(self):

        labels = ['a', 'b', 'c']

        le = LabelEncoder(input_features=['label_feature'],
                          output_features='label_feature_le_encoded')

        oh_data = le.fit_transform(labels).reshape(3, 1)

        one_hot_encoder_tf = OneHotEncoder(sparse=False)
        one_hot_encoder_tf.mlinit(input_features=le.output_features,
                                  output_features='{}_one_hot_encoded'.format(
                                      le.output_features))
        one_hot_encoder_tf.fit(oh_data)

        one_hot_encoder_tf.serialize_to_bundle(self.tmp_dir,
                                               one_hot_encoder_tf.name)

        # Test model.json
        with open("{}/{}.node/model.json".format(
                self.tmp_dir, one_hot_encoder_tf.name)) as json_data:
            model = json.load(json_data)

        self.assertEqual(one_hot_encoder_tf.op, model['op'])
        self.assertEqual(3, model['attributes']['size']['value'])
        self.assertEqual(True, model['attributes']['drop_last']['value'])
예제 #2
0
    def test_one_hot_encoder_serialization_succeeds_when_handle_unknown_is_set_to_ignore(self):
        labels = ['a', 'b', 'c', 'a', 'b', 'b']

        le = LabelEncoder(input_features=['label'], output_features='label_le_encoded')
        oh_data = le.fit_transform(labels).reshape(-1, 1)

        ohe = OneHotEncoder(handle_unknown='ignore')
        ohe.mlinit(prior_tf=le, output_features='{}_one_hot_encoded'.format(le.output_features))
        ohe.fit(oh_data)

        ohe.serialize_to_bundle(self.tmp_dir, ohe.name)
        with open("{}/{}.node/model.json".format(self.tmp_dir, ohe.name)) as json_data:
            model = json.load(json_data)

        self.assertEqual('one_hot_encoder', model['op'])
        self.assertEqual(3, model['attributes']['size']['long'])
        self.assertEqual('keep', model['attributes']['handle_invalid']['string'])
        self.assertEqual(True, model['attributes']['drop_last']['boolean'])
예제 #3
0
    def one_hot_encoder_deserializer_test(self):

        labels = ['a', 'b', 'c']

        le = LabelEncoder(input_features=['label_feature'],
                          output_features='label_feature_le_encoded')

        oh_data = le.fit_transform(labels).reshape(3, 1)

        one_hot_encoder_tf = OneHotEncoder(sparse=False)
        one_hot_encoder_tf.mlinit(input_features=le.output_features,
                                  output_features=[
                                      '{}_one_hot_encoded'.format(
                                          le.output_features[0])
                                  ])
        one_hot_encoder_tf.fit(oh_data)

        one_hot_encoder_tf.serialize_to_bundle(self.tmp_dir,
                                               one_hot_encoder_tf.name)

        # Deserialize the OneHotEncoder
        node_name = "{}.node".format(one_hot_encoder_tf.name)
        one_hot_encoder_tf_ds = OneHotEncoder()
        one_hot_encoder_tf_ds.deserialize_from_bundle(self.tmp_dir, node_name)

        # Transform some sample data
        res_a = one_hot_encoder_tf.transform(oh_data)
        res_b = one_hot_encoder_tf_ds.transform(oh_data)

        self.assertEqual(res_a[0][0], res_b[0][0])
        self.assertEqual(res_a[1][0], res_b[1][0])
        self.assertEqual(res_a[2][0], res_b[2][0])

        # Test node.json
        with open("{}/{}.node/node.json".format(
                self.tmp_dir, one_hot_encoder_tf.name)) as json_data:
            node = json.load(json_data)

        self.assertEqual(one_hot_encoder_tf_ds.name, node['name'])
        self.assertEqual(one_hot_encoder_tf_ds.input_features[0],
                         node['shape']['inputs'][0]['name'])
        self.assertEqual(one_hot_encoder_tf_ds.output_features,
                         node['shape']['outputs'][0]['name'])
예제 #4
0
    def label_encoder_test(self):

        labels = ['a', 'b', 'c']

        le = LabelEncoder(input_features=['label_feature'],
                          output_features='label_feature_le_encoded')

        le.fit(labels)

        self.assertEqual(labels, le.classes_.tolist())

        le.serialize_to_bundle(self.tmp_dir, le.name)

        # Test model.json
        with open("{}/{}.node/model.json".format(self.tmp_dir,
                                                 le.name)) as json_data:
            model = json.load(json_data)

        self.assertEqual(le.op, model['op'])
        self.assertEqual('labels', model['attributes'].keys()[0])

        # Test node.json
        with open("{}/{}.node/node.json".format(self.tmp_dir,
                                                le.name)) as json_data:
            node = json.load(json_data)

        self.assertEqual(le.name, node['name'])
        self.assertEqual(le.input_features[0],
                         node['shape']['inputs'][0]['name'])
        self.assertEqual(le.output_features,
                         node['shape']['outputs'][0]['name'])
예제 #5
0
    def label_encoder_deserializer_test(self):

        labels = ['a', 'b', 'c']

        le = LabelEncoder(input_features=['label_feature'],
                          output_features='label_feature_le_encoded')

        le.fit(labels)

        self.assertEqual(labels, le.classes_.tolist())

        le.serialize_to_bundle(self.tmp_dir, le.name)

        # Test model.json
        with open("{}/{}.node/model.json".format(self.tmp_dir,
                                                 le.name)) as json_data:
            model = json.load(json_data)

        # Deserialize the LabelEncoder
        node_name = "{}.node".format(le.name)
        label_encoder_tf = LabelEncoder()
        label_encoder_tf.deserialize_from_bundle(self.tmp_dir, node_name)

        # Transform some sample data
        res_a = le.transform(labels)
        res_b = label_encoder_tf.transform(labels)
        print("le.output_features: {}".format(le.output_features))
        print("label_encoder_tf.output_features: {}".format(
            label_encoder_tf.output_features))
        self.assertEqual(res_a[0], res_b[0])
        self.assertEqual(res_a[1], res_b[1])
        self.assertEqual(res_a[2], res_b[2])
        self.assertEqual(le.input_features, label_encoder_tf.input_features)
        self.assertEqual(le.output_features,
                         label_encoder_tf.output_features[0])
예제 #6
0
 def setUp(self):
     labels = ['a', 'b', 'c', 'a', 'b', 'b']
     self.le = LabelEncoder(input_features=['label'], output_features='label_le_encoded')
     self.oh_data = self.le.fit_transform(labels).reshape(-1, 1)
     self.tmp_dir = tempfile.mkdtemp(prefix="mleap.python.tests")
예제 #7
0
class TestOneHotEncoder(unittest.TestCase):
    def setUp(self):
        labels = ['a', 'b', 'c', 'a', 'b', 'b']
        self.le = LabelEncoder(input_features=['label'], output_features='label_le_encoded')
        self.oh_data = self.le.fit_transform(labels).reshape(-1, 1)
        self.tmp_dir = tempfile.mkdtemp(prefix="mleap.python.tests")

    def tearDown(self):
        shutil.rmtree(self.tmp_dir)

    def test_one_hot_encoder_serialization_fails_on_multiple_feature_columns(self):
        self.oh_data = np.hstack((self.oh_data, self.oh_data))  # make two feature columns

        ohe = OneHotEncoder(handle_unknown='error')
        ohe.mlinit(prior_tf=self.le, output_features='{}_one_hot_encoded'.format(self.le.output_features))
        ohe.fit(self.oh_data)

        with self.assertRaises(NotImplementedError):
            ohe.serialize_to_bundle(self.tmp_dir, ohe.name)

    def test_one_hot_encoder_serialization_fails_on_an_invalid_category_range(self):
        self.oh_data[2][0] = 3  # make invalid category range

        ohe = OneHotEncoder(handle_unknown='error')
        ohe.mlinit(prior_tf=self.le, output_features='{}_one_hot_encoded'.format(self.le.output_features))
        ohe.fit(self.oh_data)

        with self.assertRaises(ValueError):
            ohe.serialize_to_bundle(self.tmp_dir, ohe.name)

    def test_one_hot_encoder_serialization_fails_when_using_the_drop_param(self):
        ohe = OneHotEncoder(handle_unknown='error', drop='first')  # try to use `drop` parameter
        ohe.mlinit(prior_tf=self.le, output_features='{}_one_hot_encoded'.format(self.le.output_features))
        ohe.fit(self.oh_data)

        with self.assertRaises(NotImplementedError):
            ohe.serialize_to_bundle(self.tmp_dir, ohe.name)

    def test_one_hot_encoder_serialization_fails_when_using_the_dtype_param(self):
        ohe = OneHotEncoder(handle_unknown='error', dtype=int)  # try to use `dtype` parameter
        ohe.mlinit(prior_tf=self.le, output_features='{}_one_hot_encoded'.format(self.le.output_features))
        ohe.fit(self.oh_data)

        with self.assertRaises(NotImplementedError):
            ohe.serialize_to_bundle(self.tmp_dir, ohe.name)

    def test_one_hot_encoder_serialization_succeeds_when_handle_unknown_is_set_to_error(self):
        ohe = OneHotEncoder(handle_unknown='error')
        ohe.mlinit(prior_tf=self.le, output_features='{}_one_hot_encoded'.format(self.le.output_features))
        ohe.fit(self.oh_data)

        ohe.serialize_to_bundle(self.tmp_dir, ohe.name)
        with open("{}/{}.node/model.json".format(self.tmp_dir, ohe.name)) as json_data:
            model = json.load(json_data)

        self.assertEqual('one_hot_encoder', model['op'])
        self.assertEqual(3, model['attributes']['size']['long'])
        self.assertEqual('error', model['attributes']['handle_invalid']['string'])
        self.assertEqual(False, model['attributes']['drop_last']['boolean'])

    def test_one_hot_encoder_deserialization_succeeds_when_handle_unknown_is_set_to_error(self):
        ohe = OneHotEncoder(handle_unknown='error')
        ohe.mlinit(prior_tf=self.le, output_features='{}_one_hot_encoded'.format(self.le.output_features))
        ohe.fit(self.oh_data)

        ohe.serialize_to_bundle(self.tmp_dir, ohe.name)

        node_name = "{}.node".format(ohe.name)
        ohe_ds = OneHotEncoder()
        ohe_ds.deserialize_from_bundle(self.tmp_dir, node_name)

        self.oh_data[2][0] = 3  # Add an unknown category

        with self.assertRaises(ValueError):
            ohe_ds.transform(self.oh_data)

    def test_one_hot_encoder_serialization_succeeds_when_handle_unknown_is_set_to_ignore(self):
        labels = ['a', 'b', 'c', 'a', 'b', 'b']

        le = LabelEncoder(input_features=['label'], output_features='label_le_encoded')
        oh_data = le.fit_transform(labels).reshape(-1, 1)

        ohe = OneHotEncoder(handle_unknown='ignore')
        ohe.mlinit(prior_tf=le, output_features='{}_one_hot_encoded'.format(le.output_features))
        ohe.fit(oh_data)

        ohe.serialize_to_bundle(self.tmp_dir, ohe.name)
        with open("{}/{}.node/model.json".format(self.tmp_dir, ohe.name)) as json_data:
            model = json.load(json_data)

        self.assertEqual('one_hot_encoder', model['op'])
        self.assertEqual(3, model['attributes']['size']['long'])
        self.assertEqual('keep', model['attributes']['handle_invalid']['string'])
        self.assertEqual(True, model['attributes']['drop_last']['boolean'])

    def test_one_hot_encoder_deserialization_succeeds_when_handle_unknown_is_set_to_ignore(self):
        ohe = OneHotEncoder(handle_unknown='ignore')
        ohe.mlinit(prior_tf=self.le, output_features='{}_one_hot_encoded'.format(self.le.output_features))
        ohe.fit(self.oh_data)

        ohe.serialize_to_bundle(self.tmp_dir, ohe.name)

        node_name = "{}.node".format(ohe.name)
        ohe_ds = OneHotEncoder()
        ohe_ds.deserialize_from_bundle(self.tmp_dir, node_name)

        self.oh_data[2][0] = 3  # Add an unknown category

        expected = ohe.transform(self.oh_data).todense()
        actual = ohe_ds.transform(self.oh_data)

        np.testing.assert_array_equal(expected, actual)