Beispiel #1
0
 def setUpClass(cls):
     cls.features = Features()
     cls.features.append(Feature("A", str, 1))
     A, X = list(zip(*DATA))
     cls.records = [
         Record(str(i), data={"features": {
             "A": A[i],
             "X": X[i]
         }}) for i in range(0, len(X))
     ]
     cls.sources = Sources(
         MemorySource(MemorySourceConfig(records=cls.records)))
     cls.model_dir = tempfile.TemporaryDirectory()
     cls.model = TextClassificationModel(
         TextClassifierConfig(
             directory=cls.model_dir.name,
             classifications=[0, 1],
             features=cls.features,
             predict=Feature("X", int, 1),
             add_layers=True,
             layers=[
                 "Dense(units = 120, activation='relu')",
                 "Dense(units = 64, activation=relu)",
                 "Dense(units = 2, activation='softmax')",
             ],
             model_path=
             "https://tfhub.dev/google/tf2-preview/gnews-swivel-20dim-with-oov/1",
             epochs=30,
         ))
Beispiel #2
0
 async def test_model(self):
     with tempfile.TemporaryDirectory() as tempdir, patch.object(
             Model, "load", new=model_load):
         config = parse_unknown(
             "--model-directory",
             tempdir,
             "--model-features",
             "Years:int:1",
             "Experiance:int:1",
             "--model-predict",
             "Salary:float:1",
         )
         async with self.post("/configure/model/fake/salary",
                              json=config) as r:
             self.assertEqual(await r.json(), OK)
             self.assertIn("salary", self.cli.app["models"])
             self.assertEqual(
                 self.cli.app["models"]["salary"].config,
                 FakeModelConfig(
                     directory=pathlib.Path(tempdir),
                     features=Features(
                         Feature("Years", int, 1),
                         Feature("Experiance", int, 1),
                     ),
                     predict=Feature("Salary", float, 1),
                 ),
             )
             with self.subTest(context="salaryctx"):
                 # Create the context
                 async with self.get(
                         "/context/model/salary/salaryctx") as r:
                     self.assertEqual(await r.json(), OK)
                     self.assertIn("salaryctx",
                                   self.cli.app["model_contexts"])
Beispiel #3
0
 def setUpClass(cls):
     cls.model_dir = tempfile.TemporaryDirectory()
     cls.feature = Feature("starts_with_a", int, 1)
     cls.features = Features(cls.feature)
     cls.records = [
         Record(
             "a" + str(random.random()),
             data={"features": {
                 cls.feature.name: 1,
                 "string": "a"
             }},
         ) for _ in range(0, 1000)
     ]
     cls.records += [
         Record(
             "b" + str(random.random()),
             data={"features": {
                 cls.feature.name: 0,
                 "string": "not a"
             }},
         ) for _ in range(0, 1000)
     ]
     cls.sources = Sources(
         MemorySource(MemorySourceConfig(records=cls.records)))
     cls.model = DNNClassifierModel(
         DNNClassifierModelConfig(
             directory=cls.model_dir.name,
             steps=1000,
             epochs=40,
             hidden=[50, 20, 10],
             predict=Feature("string", str, 1),
             classifications=["a", "not a"],
             clstype=str,
             features=cls.features,
         ))
Beispiel #4
0
class TestFeature(AsyncTestCase):
    def setUp(self):
        self.feature = Feature()

    def test_default_dtype(self):
        self.assertEqual(self.feature.dtype(), int)

    def test_default_length(self):
        self.assertEqual(self.feature.length(), 1)

    async def test_default_applicable(self):
        self.assertEqual(await self.feature.applicable(Data("test")), True)

    def test_load_def(self):
        feature = Feature.load_def("test", "float", 10)
        self.assertEqual(feature.NAME, "test")
        self.assertEqual(feature.dtype(), float)
        self.assertEqual(feature.length(), 10)

    def test_convert_dtype(self):
        self.assertEqual(Feature.convert_dtype("float"), float)

    def test_convert_dtype_invalid(self):
        with self.assertRaisesRegex(TypeError, "Failed to convert"):
            Feature.convert_dtype("not a python data type")
Beispiel #5
0
 def setUpClass(cls):
     cls.model_dir = tempfile.TemporaryDirectory()
     cls.feature1 = Feature("feature_1", float, 1)
     cls.feature2 = Feature("feature_2", float, 1)
     cls.features = Features(cls.feature1, cls.feature2)
     cls.model = DNNRegressionModel(
         DNNRegressionModelConfig(
             directory=cls.model_dir.name,
             steps=1000,
             epochs=40,
             hidden=[50, 20, 10],
             predict=Feature("TARGET", float, 1),
             features=cls.features,
         ))
     # Generating data f(x1,x2) = 2*x1 + 3*x2
     _n_data = 2000
     _temp_data = np.random.rand(2, _n_data)
     cls.records = [
         Record(
             "x" + str(random.random()),
             data={
                 "features": {
                     cls.feature1.name: float(_temp_data[0][i]),
                     cls.feature2.name: float(_temp_data[1][i]),
                     "TARGET": 2 * _temp_data[0][i] + 3 * _temp_data[1][i],
                 }
             },
         ) for i in range(0, _n_data)
     ]
     cls.sources = Sources(
         MemorySource(MemorySourceConfig(records=cls.records)))
Beispiel #6
0
class TestFeature(AsyncTestCase):
    def setUp(self):
        self.feature = Feature()

    def test_default_dtype(self):
        self.assertEqual(self.feature.dtype(), int)

    def test_default_length(self):
        self.assertEqual(self.feature.length(), 1)

    async def test_default_applicable(self):
        self.assertEqual(await self.feature.applicable(Data('test')), True)
Beispiel #7
0
 def feature_feature_column(self, feature: Feature):
     '''
     Creates a feature column for a feature
     '''
     dtype = feature.dtype()
     if not inspect.isclass(dtype):
         LOGGER.warning('Unknown dtype %r. Cound not create column' % (dtype))
         return None
     if dtype is int or issubclass(dtype, int) \
             or dtype is float or issubclass(dtype, float):
         return self._tf.feature_column.numeric_column(feature.NAME,
                 shape=feature.length())
     LOGGER.warning('Unknown dtype %r. Cound not create column' % (dtype))
     return None
Beispiel #8
0
    def setUpClass(cls):
        A_train, B_train, X = list(zip(*TRAIN_DATA))
        A_predict, B_predict = list(zip(*PREDICT_DATA))

        cls.train_records = [
            Record(
                str(i),
                data={
                    "features": {
                        "sentence_id": A_train[i],
                        "words": B_train[i],
                        "ner_tag": X[i],
                    }
                },
            )
            for i in range(0, len(X))
        ]
        cls.train_sources = Sources(
            MemorySource(MemorySourceConfig(records=cls.train_records))
        )

        cls.predict_records = [
            Record(
                str(i),
                data={
                    "features": {
                        "sentence_id": A_predict[i],
                        "words": B_predict[i],
                    }
                },
            )
            for i in range(0, len(A_predict))
        ]
        cls.predict_sources = Sources(
            MemorySource(MemorySourceConfig(records=cls.predict_records))
        )

        cls.model_dir = tempfile.TemporaryDirectory()
        cls.model = NERModel(
            NERModelConfig(
                sid=Feature("sentence_id", int, 1),
                words=Feature("words", str, 1),
                predict=Feature("ner_tag", str, 1),
                output_dir=cls.model_dir.name,
                model_architecture_type="bert",
                model_name_or_path="bert-base-cased",
                no_cuda=True,
            )
        )
Beispiel #9
0
 def _feature_feature_column(self, feature: Feature):
     """
     Creates a feature column for a feature
     """
     dtype = feature.dtype()
     if not inspect.isclass(dtype):
         self.logger.warning("Unknown dtype %r. Cound not create column" %
                             (dtype))
         return None
     if (dtype is int or issubclass(dtype, int) or dtype is float
             or issubclass(dtype, float)):
         return tensorflow.feature_column.numeric_column(
             feature.NAME, shape=feature.length())
     self.logger.warning("Unknown dtype %r. Cound not create column" %
                         (dtype))
     return None
Beispiel #10
0
    def setUpClass(cls):
        cls.features = Features()
        cls.features.append(Feature("A", str, 1))
        A, X = list(zip(*DATA))
        cls.records = [
            Record(str(i), data={"features": {
                "A": A[i],
                "X": X[i]
            }}) for i in range(len(X))
        ]

        cls.sources = Sources(
            MemorySource(MemorySourceConfig(records=cls.records)))
        cls.model_dir = tempfile.TemporaryDirectory()
        cls.model = HFClassificationModel(
            HFClassificationModelConfig(
                model_name_or_path="bert-base-cased",
                cache_dir=cls.model_dir.name,
                logging_dir=cls.model_dir.name,
                output_dir=cls.model_dir.name,
                features=cls.features,
                predict=Feature("X", int, 1),
                label_list=["0", "1"],
            ))
Beispiel #11
0
 async def test_model(self):
     test_feature_val = [
         0,
         1.5,
         2,
     ]  # inserting zero so that its 1-indexable
     test_target = 2 * test_feature_val[1] + 3 * test_feature_val[2]
     # should be same function used in TestDNN.setupclass
     a = Record(
         "a",
         data={
             "features": {
                 self.feature1.name: test_feature_val[1],
                 self.feature2.name: test_feature_val[2],
             }
         },
     )
     target_name = self.model.config.predict.name
     scorer = MeanSquaredErrorAccuracy()
     for i in range(0, 7):
         await train(self.model, self.sources)
         res = await score(self.model, scorer, Feature("TARGET", float, 1),
                           self.sources)
         # Retry because of tensorflow intermitant low accuracy
         if res <= 0.8 and i < 5:
             print("Retry i:", i, "accuracy:", res)
             self.model_dir.cleanup()
             self.model_dir = tempfile.TemporaryDirectory()
             self.model.config = self.model.config._replace(
                 location=pathlib.Path(self.model_dir.name))
             continue
         self.assertGreater(res, 0.0)
         res = [
             record
             async for record in predict(self.model, a, keep_record=True)
         ]
         self.assertEqual(len(res), 1)
         self.assertEqual(res[0].key, a.key)
         test_error_norm = abs(
             (test_target - res[0].prediction(target_name).value) /
             test_target + 1e-6)
         error_threshold = 0.3
         self.assertLess(test_error_norm, error_threshold)
Beispiel #12
0
 async def test_model(self):
     scorer = ClassificationAccuracy()
     for i in range(0, 7):
         await train(self.model, self.sources)
         res = await score(self.model, scorer, Feature("string", str, 1),
                           self.sources)
         # Retry because of tensorflow intermitant low accuracy
         if res <= 0.9 and i < 5:
             print("Retry i:", i, "accuracy:", res)
             self.model_dir.cleanup()
             self.model_dir = tempfile.TemporaryDirectory()
             self.model.config = self.model.config._replace(
                 location=self.model_dir.name)
             continue
         self.assertGreater(res, 0.9)
         a = Record("a", data={"features": {self.feature.name: 1}})
         target_name = self.model.config.predict.name
         res = [
             record
             async for record in predict(self.model, a, keep_record=True)
         ]
         self.assertEqual(len(res), 1)
         self.assertEqual(res[0].key, a.key)
         self.assertTrue(res[0].prediction(target_name).value)
Beispiel #13
0
 def test_load_def(self):
     # TODO This test should be removed or its name should be modified.
     feature = Feature("test", float, 10)
     self.assertEqual(feature.name, "test")
     self.assertEqual(feature.dtype, float)
     self.assertEqual(feature.length, 10)
Beispiel #14
0
 def setUp(self):
     self.feature = Feature("name", int, 1)
Beispiel #15
0
 async def setUp(self):
     await super().setUp()
     self.one = Feature("one", int, 1)
     self.two = Feature("two", float, 2)
     self.three = Feature("three", int, 1)
     self.features = Features(self.one, self.two, self.three)
Beispiel #16
0
 async def setUp(self):
     await super().setUp()
     self.feature = Feature("name", int, 1)
Beispiel #17
0
 def setUp(self):
     self.one = Feature("one", int, 1)
     self.two = Feature("two", float, 2)
     self.three = Feature("three", int, 1)
     self.features = Features(self.one, self.two, self.three)
Beispiel #18
0
 def setUp(self):
     self.feature = Feature()
Beispiel #19
0
    def setUpClass(cls):
        cls.is_multi = "MULTI_" in cls.MODEL_TYPE
        cls.model_dir = tempfile.TemporaryDirectory()
        cls.features = Features()
        if cls.MODEL_TYPE in classifier_types:
            A, B, C, D, E, F, G, H, X, Y = list(
                zip(*FEATURE_DATA_CLASSIFICATION)
            )
            cls.features.append(Feature("A", float, 1))
            cls.features.append(Feature("B", float, 1))
            cls.features.append(Feature("C", float, 1))
            cls.features.append(Feature("D", float, 1))
            cls.features.append(Feature("E", float, 1))
            cls.features.append(Feature("F", float, 1))
            cls.features.append(Feature("G", float, 1))
            cls.features.append(Feature("H", float, 1))
            if cls.MODEL_TYPE == "CLASSIFICATION":
                cls.features.append(Feature("X", float, 1))
            cls.records = [
                Record(
                    str(i),
                    data={
                        "features": {
                            "A": A[i],
                            "B": B[i],
                            "C": C[i],
                            "D": D[i],
                            "E": E[i],
                            "F": F[i],
                            "G": G[i],
                            "H": H[i],
                            "X": X[i],
                            "Y": Y[i],
                        }
                    },
                )
                for i in range(0, len(A))
            ]

        elif cls.MODEL_TYPE in regressor_types:
            cls.features.append(Feature("A", float, 1))
            cls.features.append(Feature("B", float, 1))
            cls.features.append(Feature("C", float, 1))
            cls.features.append(Feature("D", float, 1))
            if cls.MODEL_TYPE == "REGRESSION":
                cls.features.append(Feature("X", float, 1))
            A, B, C, D, X, Y = list(zip(*FEATURE_DATA_REGRESSION))
            cls.records = [
                Record(
                    str(i),
                    data={
                        "features": {
                            "A": A[i],
                            "B": B[i],
                            "C": C[i],
                            "D": D[i],
                            "X": X[i],
                            "Y": Y[i],
                        }
                    },
                )
                for i in range(0, len(A))
            ]
        elif cls.MODEL_TYPE == "CLUSTERING":
            cls.features.append(Feature("A", float, 1))
            cls.features.append(Feature("B", float, 1))
            cls.features.append(Feature("C", float, 1))
            cls.features.append(Feature("D", float, 1))
            A, B, C, D, X = list(zip(*FEATURE_DATA_CLUSTERING))
            cls.records = [
                Record(
                    str(i),
                    data={
                        "features": {
                            "A": A[i],
                            "B": B[i],
                            "C": C[i],
                            "D": D[i],
                            "X": X[i],
                        }
                    },
                )
                for i in range(0, len(A))
            ]

        cls.sources = Sources(
            MemorySource(MemorySourceConfig(records=cls.records))
        )
        properties = {
            "location": cls.model_dir.name,
            "features": cls.features,
        }
        config_fields = dict()
        estimator_type = cls.MODEL.SCIKIT_MODEL._estimator_type
        if estimator_type in supervised_estimators:
            if cls.is_multi:
                config_fields["predict"] = Features(
                    Feature("X", float, 1), Feature("Y", float, 1)
                )
            else:
                config_fields["predict"] = Feature("X", float, 1)
        elif estimator_type in unsupervised_estimators:
            # TODO If cls.TRUE_CLSTR_PRESENT then we want to use the
            # mutual_info_score scikit accuracy scorer. In this case we might
            # want to change tcluster to a boolean config property.
            # For more info see commit e4f523976bf37d3457cda140ceab7899420ae2c7
            config_fields["predict"] = Feature("X", float, 1)
        cls.model = cls.MODEL(
            cls.MODEL_CONFIG(**{**properties, **config_fields})
        )
        cls.scorer = cls.SCORER()
Beispiel #20
0
 def test_load_builtin_features(self):
     features = Feature.load()
     for mustLoad in FEATURES:
         with self.subTest(mustLoad=mustLoad):
             self.assertIn(mustLoad, features)
Beispiel #21
0
 async def test_01_accuracy(self):
     res = await score(
         self.model, self.scorer, Feature("X", float, 1), self.sources
     )
     self.assertTrue(isinstance(res, float))
Beispiel #22
0
    def setUpClass(cls):
        cls.model_dir = tempfile.TemporaryDirectory()
        cls.features = Features()
        cls.features.append(Feature("A", float, 1))
        cls.features.append(Feature("B", float, 1))
        cls.features.append(Feature("C", float, 1))
        cls.features.append(Feature("D", float, 1))
        cls.features.append(Feature("E", float, 1))
        cls.features.append(Feature("F", float, 1))
        cls.features.append(Feature("G", int, 1))
        cls.features.append(Feature("H", int, 1))

        A, B, C, D, E, F, G, H, X = list(zip(*DATA))
        cls.records = [
            Record(
                str(i),
                data={
                    "features": {
                        "A": A[i],
                        "B": B[i],
                        "C": C[i],
                        "D": D[i],
                        "E": E[i],
                        "F": F[i],
                        "G": G[i],
                        "H": H[i],
                        "X": X[i],
                    }
                },
            )
            for i in range(0, len(A))
        ]

        cls.sources = Sources(
            MemorySource(MemorySourceConfig(records=cls.records))
        )
        cls.model = VWModel(
            VWConfig(
                location=cls.model_dir.name,
                features=cls.features,
                predict=Feature("X", float, 1),
                # A and B will be namespace n1
                # A and C will be in namespace n2
                namespace=["n1_A_B", "n2_A_C"],
                importance=Feature("H", int, 1),
                tag=Feature("G", int, 1),
                task="regression",
                vwcmd=[
                    "l2",
                    "0.1",
                    "loss_function",
                    "squared",
                    "passes",
                    "10",
                ],
            )
        )
        cls.scorer = MeanSquaredErrorAccuracy()
Beispiel #23
0
 def test_convert_dtype(self):
     self.assertEqual(Feature.convert_dtype("float"), float)
Beispiel #24
0
 def test_convert_dtype_invalid(self):
     with self.assertRaisesRegex(TypeError, "Failed to convert"):
         Feature.convert_dtype("not a python data type")
Beispiel #25
0
 def test_load_def(self):
     feature = Feature.load_def("test", "float", 10)
     self.assertEqual(feature.NAME, "test")
     self.assertEqual(feature.dtype(), float)
     self.assertEqual(feature.length(), 10)
Beispiel #26
0
 async def test_01_accuracy(self):
     res = await score(self.model, self.scorer, Feature("X", int, 1),
                       self.sources)
     self.assertGreater(res, 0)
Beispiel #27
0
    Record(
        str(i),
        data={
            "features": {
                "Years": A[i] * 10,
                "Expertise": B[i] * 10,
                "Trust": C[i] * 10,
                "Salary": D[i] * 10,
            }
        },
    )
    for i in range(len(A))
]

TEST_FEATURE = Features(
    Feature("Years", int, 1),
    Feature("Expertise", int, 1),
    Feature("Trust", float, 1),
    Feature("Salary", int, 1),
)

TEST_DATAFLOW1 = DataFlow(
    operations={
        "edit_feature": edit_feature,
        "associate_definition": AssociateDefinition,
    },
    flow={
        "edit_feature": InputFlow(
            inputs={
                "features": [
                    {"seed": ["Years", "Expertise", "Trust", "Salary"]}
Beispiel #28
0
 def test_feature(self):
     self.assertIn("face", json.dumps(Feature("face"), cls=JSONEncoder))
Beispiel #29
0
    def setUpClass(cls):
        cls.model_dir = tempfile.TemporaryDirectory()
        cls.features = Features()
        if cls.MODEL_TYPE is "CLASSIFICATION":
            cls.features.append(Feature("A", float, 1))
            cls.features.append(Feature("B", float, 1))
            cls.features.append(Feature("C", float, 1))
            cls.features.append(Feature("D", float, 1))
            cls.features.append(Feature("E", float, 1))
            cls.features.append(Feature("F", float, 1))
            cls.features.append(Feature("G", float, 1))
            cls.features.append(Feature("H", float, 1))
            cls.features.append(Feature("I", float, 1))
            A, B, C, D, E, F, G, H, I, X = list(
                zip(*FEATURE_DATA_CLASSIFICATION)
            )
            cls.records = [
                Record(
                    str(i),
                    data={
                        "features": {
                            "A": A[i],
                            "B": B[i],
                            "C": C[i],
                            "D": D[i],
                            "E": E[i],
                            "F": F[i],
                            "G": G[i],
                            "H": H[i],
                            "I": I[i],
                            "X": X[i],
                        }
                    },
                )
                for i in range(0, len(A))
            ]
        elif cls.MODEL_TYPE is "REGRESSION":
            cls.features.append(Feature("A", float, 1))
            cls.features.append(Feature("B", float, 1))
            cls.features.append(Feature("C", float, 1))
            A, B, C, X = list(zip(*FEATURE_DATA_REGRESSION))
            cls.records = [
                Record(
                    str(i),
                    data={
                        "features": {
                            "A": A[i],
                            "B": B[i],
                            "C": C[i],
                            "X": X[i],
                        }
                    },
                )
                for i in range(0, len(A))
            ]
        elif cls.MODEL_TYPE is "CLUSTERING":
            cls.features.append(Feature("A", float, 1))
            cls.features.append(Feature("B", float, 1))
            cls.features.append(Feature("C", float, 1))
            cls.features.append(Feature("D", float, 1))
            A, B, C, D, X = list(zip(*FEATURE_DATA_CLUSTERING))
            cls.records = [
                Record(
                    str(i),
                    data={
                        "features": {
                            "A": A[i],
                            "B": B[i],
                            "C": C[i],
                            "D": D[i],
                            "X": X[i],
                        }
                    },
                )
                for i in range(0, len(A))
            ]

        cls.sources = Sources(
            MemorySource(MemorySourceConfig(records=cls.records))
        )
        properties = {
            "directory": cls.model_dir.name,
            "features": cls.features,
        }
        config_fields = dict()
        estimator_type = cls.MODEL.SCIKIT_MODEL._estimator_type
        if estimator_type in supervised_estimators:
            config_fields["predict"] = Feature("X", float, 1)
        elif estimator_type in unsupervised_estimators:
            if cls.TRUE_CLSTR_PRESENT:
                config_fields["tcluster"] = Feature("X", float, 1)
        cls.model = cls.MODEL(
            cls.MODEL_CONFIG(**{**properties, **config_fields})
        )