def setUpClass(cls):
        # Create a temporary directory to store the trained model
        cls.model_dir = tempfile.TemporaryDirectory()
        # Create an instance of the model
        cls.model = XGBRegressorModel(
            XGBRegressorModelConfig(
                features=Features(Feature("Feature1", float, 1),
                                  Feature("Feature2")),
                predict=Feature("Target", float, 1),
                directory=cls.model_dir.name,
            ))
        # Generating data f(x1,x2) = 2*x1 + 3*x2
        _n_data = 2000
        _temp_data = np.random.rand(2, _n_data)
        cls.records = [
            Record(
                "x" + str(random.random()),
                data={
                    "features": {
                        "Feature1": float(_temp_data[0][i]),
                        "Feature2": float(_temp_data[1][i]),
                        "Target": 2 * _temp_data[0][i] + 3 * _temp_data[1][i],
                    }
                },
            ) for i in range(0, _n_data)
        ]

        cls.trainingsource = Sources(
            MemorySource(MemorySourceConfig(records=cls.records[:1800])))
        cls.testsource = Sources(
            MemorySource(MemorySourceConfig(records=cls.records[1800:])))
Esempio n. 2
0
    def test_predict(self):
        self.required_plugins("dffml-model-scikit")
        # Import SciKit modules
        dffml_model_scikit = importlib.import_module("dffml_model_scikit")
        # Instantiate the model
        model = dffml_model_scikit.LinearRegressionModel(
            directory=self.mktempdir(),
            predict=Feature("Salary", int, 1),
            features=Features(
                Feature("Years", int, 1),
                Feature("Expertise", int, 1),
                Feature("Trust", float, 1),
            ),
        )

        training_data = CSVSource(filename=self.train_filename)
        test_data = CSVSource(filename=self.test_filename)
        predict_data = CSVSource(filename=self.predict_filename)

        # Train the model
        train(model, training_data)
        # Assess accuracy
        accuracy(model, test_data)
        # Make prediction
        predictions = [
            prediction for prediction in predict(model, predict_data)
        ]
        self.assertEqual(round(predictions[0][2]["Salary"]["value"]), 70)
        self.assertEqual(round(predictions[1][2]["Salary"]["value"]), 80)
Esempio n. 3
0
 def test_config_set(self):
     config = FakeTesting.config(
         parse_unknown(
             "--test-fake-name",
             "feedface",
             "--test-num",
             "-4.2",
             "--test-fake-label",
             "default-label",
             "--test-fake-readonly",
             "--test-files",
             "a",
             "b",
             "c",
             "--test-fake-source",
             "csv",
             "--test-source-filename",
             "file.csv",
             "--test-features",
             "Year:int:1",
             "Commits:int:10",
         ))
     self.assertEqual(config.num, -4.2)
     self.assertEqual(config.files, ["a", "b", "c"])
     self.assertEqual(config.name, "feedface")
     self.assertEqual(config.label, "default-label")
     self.assertTrue(config.readonly)
     self.assertTrue(isinstance(config.source, CSVSource))
     self.assertEqual(config.source.config.filename, "file.csv")
     self.assertEqual(
         config.features,
         Features(DefFeature("Year", int, 1),
                  DefFeature("Commits", int, 10)),
     )
Esempio n. 4
0
 async def test_model(self):
     with tempfile.TemporaryDirectory() as tempdir, patch.object(
             Model, "load", new=model_load):
         config = parse_unknown(
             "--model-directory",
             tempdir,
             "--model-features",
             "Years:int:1",
             "Experiance:int:1",
             "--model-predict",
             "Salary:float:1",
         )
         async with self.post("/configure/model/fake/salary",
                              json=config) as r:
             self.assertEqual(await r.json(), OK)
             self.assertIn("salary", self.cli.app["models"])
             self.assertEqual(
                 self.cli.app["models"]["salary"].config,
                 FakeModelConfig(
                     directory=pathlib.Path(tempdir),
                     features=Features(
                         Feature("Years", int, 1),
                         Feature("Experiance", int, 1),
                     ),
                     predict=Feature("Salary", float, 1),
                 ),
             )
             with self.subTest(context="salaryctx"):
                 # Create the context
                 async with self.get(
                         "/context/model/salary/salaryctx") as r:
                     self.assertEqual(await r.json(), OK)
                     self.assertIn("salaryctx",
                                   self.cli.app["model_contexts"])
Esempio n. 5
0
class FakeTestingConfig2:
    name: str = field("Name of FakeTesting2")
    num: float
    features: Features = Features(
        DefFeature("default", int, 1), DefFeature("features", int, 10)
    )
    label: str = "unlabeled"
Esempio n. 6
0
 def test_config_defaults(self):
     config = FakeTesting.config(
         parse_unknown(
             "--test-fake-name",
             "feedface",
             "--test-num",
             "-4.2",
             "--test-files",
             "a",
             "b",
             "c",
             "--test-source-filename",
             "file.json",
             "--test-features",
             "Year:int:1",
             "Commits:int:10",
             "--test-fake-nums",
             "100",
         ))
     self.assertEqual(config.num, -4.2)
     self.assertEqual(config.files, ["a", "b", "c"])
     self.assertEqual(config.name, "feedface")
     self.assertEqual(config.label, "unlabeled")
     self.assertFalse(config.readonly)
     self.assertTrue(isinstance(config.source, JSONSource))
     self.assertEqual(config.source.config.filename,
                      pathlib.Path("file.json"))
     self.assertEqual(
         config.features,
         Features(Feature("Year", int, 1), Feature("Commits", int, 10)),
     )
     self.assertEqual(config.nums, (100, ))
Esempio n. 7
0
    def setUpClass(cls):
        # Create a temporary directory to store the trained model
        cls.model_dir = tempfile.TemporaryDirectory()
        # Create an instance of the model
        cls.model = AnomalyModel(
            features=Features(
                Feature("A", int, 1),
                Feature("B", int, 2),
            ),
            predict=Feature("Y", int, 1),
            directory=cls.model_dir.name,
        )

        # Generating data

        _n_data = 1800
        _temp_data = np.random.normal(2, 1, size=(2, _n_data))
        cls.records = [
            Record(
                "x" + str(random.random()),
                data={
                    "features": {
                        "A": float(_temp_data[0][i]),
                        "B": float(_temp_data[1][i]),
                        "Y":
                        (_temp_data[0][i] > 1 - _temp_data[1][i]).astype(int),
                    }
                },
            ) for i in range(0, _n_data)
        ]

        cls.trainingsource = Sources(
            MemorySource(MemorySourceConfig(records=cls.records[:1400])))
        cls.testsource = Sources(
            MemorySource(MemorySourceConfig(records=cls.records[1400:])))
Esempio n. 8
0
    async def test_predict(self):
        self.required_plugins("dffml-model-scikit")
        # Import SciKit modules
        dffml_model_scikit = importlib.import_module("dffml_model_scikit")
        # Instantiate the model
        model = dffml_model_scikit.LinearRegressionModel(
            location=self.mktempdir(),
            predict=Feature("Salary", int, 1),
            features=Features(
                Feature("Years", int, 1),
                Feature("Expertise", int, 1),
                Feature("Trust", float, 1),
            ),
        )

        training_data = CSVSource(filename=self.train_filename)
        test_data = CSVSource(filename=self.test_filename)
        predict_data = CSVSource(filename=self.predict_filename)

        # Train the model
        await train(model, training_data)
        # Assess accuracy
        scorer = MeanSquaredErrorAccuracy()
        await score(model, scorer, Feature("Salary", int, 1), test_data)
        # Make prediction
        predictions = [
            prediction async for prediction in predict(model, predict_data)
        ]
        self.assertEqual(round(predictions[0][2]["Salary"]["value"]), 70)
        self.assertEqual(round(predictions[1][2]["Salary"]["value"]), 80)

        # Test input data as list
        await train(model, *self.train_data)
        await score(model, scorer, Feature("Salary", int, 1), *self.test_data)
        predictions = [
            prediction
            async for prediction in predict(model, *self.predict_data)
        ]
        self.assertEqual(round(predictions[0][2]["Salary"]["value"]), 70)
        self.assertEqual(round(predictions[1][2]["Salary"]["value"]), 80)
Esempio n. 9
0
from dffml_model_tensorflow.dnnr import (
    DNNRegressionModel,
    DNNRegressionModelConfig,
)

training_data = CSVSource(
    CSVSourceConfig(filename="training.csv", readonly=True))
test_data = CSVSource(CSVSourceConfig(filename="test.csv", readonly=True))
predict_data = CSVSource(CSVSourceConfig(filename="predict.csv",
                                         readonly=True))

model = DNNRegressionModel(
    DNNRegressionModelConfig(
        features=Features(
            DefFeature("Years", int, 1),
            DefFeature("Expertise", int, 1),
            DefFeature("Trust", float, 1),
        ),
        predict="Salary",
    ))

Train(model=model, sources=[training_data])()

accuracy = Accuracy(model=model, sources=[test_data])()

row0, row1 = PredictAll(model=model, sources=[predict_data])()

print("Accuracy", accuracy)
print(row0)
print(row1)