Beispiel #1
0
    def test_standard_scaler_deserializer(self):

        extract_features = ['a']
        feature_extractor = FeatureExtractor(input_scalars=['a'],
                                         output_vector='extracted_a_output',
                                         output_vector_items=["{}_out".format(x) for x in extract_features])

        # Serialize a standard scaler to a bundle
        standard_scaler = StandardScaler(with_mean=True,
                                         with_std=True
                                         )

        standard_scaler.mlinit(prior_tf=feature_extractor,
                               output_features='a_scaled')

        standard_scaler.fit(self.df[['a']])

        standard_scaler.serialize_to_bundle(self.tmp_dir, standard_scaler.name)

        # Now deserialize it back

        node_name = "{}.node".format(standard_scaler.name)

        standard_scaler_tf = StandardScaler()

        standard_scaler_tf = standard_scaler_tf.deserialize_from_bundle(self.tmp_dir, node_name)

        # Transform some sample data
        res_a = standard_scaler.transform(self.df[['a']])
        res_b = standard_scaler_tf.transform(self.df[['a']])

        self.assertEqual(res_a[0], res_b[0])
        self.assertEqual(standard_scaler.name, standard_scaler_tf.name)
        self.assertEqual(standard_scaler.op, standard_scaler_tf.op)
        self.assertEqual(standard_scaler.mean_, standard_scaler_tf.mean_)
        self.assertEqual(standard_scaler.scale_, standard_scaler_tf.scale_)
Beispiel #2
0
    def test_standard_scaler_serializer(self):

        standard_scaler = StandardScaler(with_mean=True, with_std=True)

        standard_scaler.mlinit(input_features='a', output_features='a_scaled')

        standard_scaler.fit(self.df[['a']])

        standard_scaler.serialize_to_bundle(self.tmp_dir, standard_scaler.name)

        expected_mean = self.df.a.mean()
        expected_std = np.sqrt(np.var(self.df.a))

        expected_model = {
            "op": "standard_scaler",
            "attributes": {
                "mean": {
                    "type": {
                        "type": "tensor",
                        "tensor": {
                            "base": "double"
                        }
                    },
                    "value": {
                        "values": [expected_mean],
                        "dimensions": [1]
                    }
                },
                "std": {
                    "type": {
                        "type": "tensor",
                        "tensor": {
                            "base": "double"
                        }
                    },
                    "value": {
                        "values": [expected_std],
                        "dimensions": [1]
                    }
                }
            }
        }

        self.assertAlmostEqual(expected_mean,
                               standard_scaler.mean_.tolist()[0],
                               places=7)
        self.assertAlmostEqual(expected_std,
                               np.sqrt(standard_scaler.var_.tolist()[0]),
                               places=7)

        # Test model.json
        with open("{}/{}.node/model.json".format(
                self.tmp_dir, standard_scaler.name)) as json_data:
            model = json.load(json_data)

        self.assertEqual(standard_scaler.op, expected_model['op'])
        self.assertEqual(
            expected_model['attributes']['mean']['value']['dimensions'][0],
            model['attributes']['mean']['value']['dimensions'][0])
        self.assertEqual(
            expected_model['attributes']['std']['value']['dimensions'][0],
            model['attributes']['std']['value']['dimensions'][0])
        self.assertAlmostEqual(
            expected_model['attributes']['mean']['value']['values'][0],
            model['attributes']['mean']['value']['values'][0],
            places=7)
        self.assertAlmostEqual(
            expected_model['attributes']['std']['value']['values'][0],
            model['attributes']['std']['value']['values'][0],
            places=7)

        # Test node.json
        with open("{}/{}.node/node.json".format(
                self.tmp_dir, standard_scaler.name)) as json_data:
            node = json.load(json_data)

        self.assertEqual(standard_scaler.name, node['name'])
        self.assertEqual(standard_scaler.input_features,
                         node['shape']['inputs'][0]['name'])
        self.assertEqual(standard_scaler.output_features,
                         node['shape']['outputs'][0]['name'])
Beispiel #3
0
    def test_standard_scaler_serializer(self):

        standard_scaler = StandardScaler(with_mean=True, with_std=True)

        extract_features = ['a']
        feature_extractor = FeatureExtractor(
            input_scalars=['a'],
            output_vector='extracted_a_output',
            output_vector_items=["{}_out".format(x) for x in extract_features])

        standard_scaler.mlinit(prior_tf=feature_extractor,
                               output_features='a_scaled')

        standard_scaler.fit(self.df[['a']])

        standard_scaler.serialize_to_bundle(self.tmp_dir, standard_scaler.name)

        expected_mean = self.df.a.mean()
        expected_std = np.sqrt(np.var(self.df.a))

        expected_model = {
            "op": "standard_scaler",
            "attributes": {
                "mean": {
                    "double": [expected_mean],
                    "shape": {
                        "dimensions": [{
                            "size": 1,
                            "name": ""
                        }]
                    },
                    "type": "tensor"
                },
                "std": {
                    "double": [expected_std],
                    "shape": {
                        "dimensions": [{
                            "size": 1,
                            "name": ""
                        }]
                    },
                    "type": "tensor"
                }
            }
        }

        self.assertAlmostEqual(expected_mean,
                               standard_scaler.mean_.tolist()[0],
                               places=7)
        self.assertAlmostEqual(expected_std,
                               np.sqrt(standard_scaler.var_.tolist()[0]),
                               places=7)

        # Test model.json
        with open("{}/{}.node/model.json".format(
                self.tmp_dir, standard_scaler.name)) as json_data:
            model = json.load(json_data)

        self.assertEqual(standard_scaler.op, expected_model['op'])
        self.assertEqual(
            expected_model['attributes']['mean']['shape']['dimensions'][0]
            ['size'],
            model['attributes']['mean']['shape']['dimensions'][0]['size'])
        self.assertEqual(
            expected_model['attributes']['std']['shape']['dimensions'][0]
            ['size'],
            model['attributes']['std']['shape']['dimensions'][0]['size'])
        self.assertAlmostEqual(
            expected_model['attributes']['mean']['double'][0],
            model['attributes']['mean']['double'][0],
            places=7)
        self.assertAlmostEqual(
            expected_model['attributes']['std']['double'][0],
            model['attributes']['std']['double'][0],
            places=7)

        # Test node.json
        with open("{}/{}.node/node.json".format(
                self.tmp_dir, standard_scaler.name)) as json_data:
            node = json.load(json_data)

        self.assertEqual(standard_scaler.name, node['name'])
        self.assertEqual(standard_scaler.input_features,
                         node['shape']['inputs'][0]['name'])
        self.assertEqual(standard_scaler.output_features,
                         node['shape']['outputs'][0]['name'])