Esempio n. 1
0
 async def test_tag(self):
     with tempfile.TemporaryDirectory() as testdir:
         self.testfile = os.path.join(testdir, str(random.random()))
         untagged = await self.setUpSource()
         tagged = await self.setUpSource()
         tagged.config = tagged.config._replace(tag="sometag")
         async with untagged, tagged:
             async with untagged() as uctx, tagged() as lctx:
                 await uctx.update(
                     Record("0", data={"features": {
                         "feed": 1
                     }}))
                 await lctx.update(
                     Record("0", data={"features": {
                         "face": 2
                     }}))
             # async with untagged, tagged:
             async with untagged() as uctx, tagged() as lctx:
                 record = await uctx.record("0")
                 self.assertIn("feed", record.features())
                 record = await lctx.record("0")
                 self.assertIn("face", record.features())
         with open(self.testfile, "r") as fd:
             dict_reader = csv.DictReader(fd, dialect="strip")
             rows = {row["tag"]: {row["key"]: row} for row in dict_reader}
             self.assertIn("untagged", rows)
             self.assertIn("sometag", rows)
             self.assertIn("0", rows["untagged"])
             self.assertIn("0", rows["sometag"])
             self.assertIn("feed", rows["untagged"]["0"])
             self.assertIn("face", rows["sometag"]["0"])
             self.assertEqual("1", rows["untagged"]["0"]["feed"])
             self.assertEqual("2", rows["sometag"]["0"]["face"])
Esempio n. 2
0
File: db.py Progetto: emrul/dffml
    async def record(self, key: str):
        record = Record(key)
        async with self.parent.db() as db_ctx:
            try:
                row = await db_ctx.lookup(
                    self.parent.config.table_name,
                    cols=None,  # None turns into *. We want all rows
                    conditions=[[Condition("key", "=", key)]],
                ).__anext__()
            except StopAsyncIteration:
                # This would happen if there is no matching row, so the async generator reached the end
                return record

        if row is not None:
            features = {}
            predictions = {}
            for key, value in row.items():
                if key.startswith("feature_"):
                    features[key.replace("feature_", "")] = value
                elif "_value" in key:
                    target = key.replace("_value", "")
                    predictions[target] = {
                        "value": row[target + "_value"],
                        "confidence": row[target + "_confidence"],
                    }
            record.merge(
                Record(
                    row["key"],
                    data={
                        "features": features,
                        "prediction": predictions
                    },
                ))
        return record
Esempio n. 3
0
    async def test_ini(self):
        with TemporaryDirectory() as testdir:
            self.testfile = os.path.join(testdir, "testfile.ini")
            # Create a source
            source = INISource(filename=self.testfile,
                               allowempty=True,
                               readwrite=True)
            # Save some data in the source
            await save(
                source,
                Record("section1", data={"features": {
                    "A": 1,
                    "B": 2
                }}),
                Record("section2", data={"features": {
                    "C": 3,
                    "D": 4
                }}),
            )
            # Load all the records
            records = [record async for record in load(source)]

            self.assertIsInstance(records, list)
            self.assertEqual(len(records), 2)
            self.assertDictEqual(records[0].features(), {"a": 1, "b": 2})
            self.assertDictEqual(records[1].features(), {"c": 3, "d": 4})
Esempio n. 4
0
    async def record(self, key: str):
        query = self.parent.config.record_query
        record = Record(key)
        db = self.conn
        await db.execute(query, (key, ))
        row = await db.fetchone()

        if row is not None:
            features = {}
            predictions = {}
            for key, value in row.items():
                if key.startswith("feature_"):
                    features[key.replace("feature_", "")] = value
                elif "_value" in key:
                    target = key.replace("_value", "")
                    predictions[target] = {
                        "value": row[target + "_value"],
                        "confidence": row[target + "_confidence"],
                    }
            record.merge(
                Record(
                    row["key"],
                    data={
                        "features": features,
                        "prediction": predictions
                    },
                ))
        return record
Esempio n. 5
0
 def setUpClass(cls):
     cls.feature = StartsWithA()
     cls.features = Features(cls.feature)
     cls.model_dir = tempfile.TemporaryDirectory()
     cls.model = MiscModel(
         MiscModelConfig(
             directory=cls.model_dir.name,
             classifications=["not a", "a"],
             features=cls.features,
         )
     )
     cls.records = [
         Record(
             "a" + str(random.random()),
             data={"features": {cls.feature.NAME: 1, "string": "a"}},
         )
         for _ in range(0, 1000)
     ]
     cls.records += [
         Record(
             "b" + str(random.random()),
             data={"features": {cls.feature.NAME: 0, "string": "not a"}},
         )
         for _ in range(0, 1000)
     ]
     cls.sources = Sources(
         MemorySource(MemorySourceConfig(records=cls.records))
     )
Esempio n. 6
0
 def setUpClass(cls):
     cls.model_dir = tempfile.TemporaryDirectory()
     cls.feature = Feature("starts_with_a", int, 1)
     cls.features = Features(cls.feature)
     cls.records = [
         Record(
             "a" + str(random.random()),
             data={"features": {
                 cls.feature.name: 1,
                 "string": "a"
             }},
         ) for _ in range(0, 1000)
     ]
     cls.records += [
         Record(
             "b" + str(random.random()),
             data={"features": {
                 cls.feature.name: 0,
                 "string": "not a"
             }},
         ) for _ in range(0, 1000)
     ]
     cls.sources = Sources(
         MemorySource(MemorySourceConfig(records=cls.records)))
     cls.model = DNNClassifierModel(
         DNNClassifierModelConfig(
             directory=cls.model_dir.name,
             steps=1000,
             epochs=40,
             hidden=[50, 20, 10],
             predict=Feature("string", str, 1),
             classifications=["a", "not a"],
             clstype=str,
             features=cls.features,
         ))
Esempio n. 7
0
    def setUpClass(cls):
        (
            A_train,
            B_train,
            C_train,
            X_train,
            D_train,
            E_train,
        ) = list(zip(*TRAIN_DATA))
        A_test, B_test, C_test, X_test, D_test, E_test = list(zip(*TEST_DATA))

        cls.train_records = [
            Record(
                str(i),
                data={
                    "features": {
                        "title": A_train[i],
                        "context": B_train[i],
                        "question": C_train[i],
                        "answer_text": X_train[i],
                        "start_pos_char": D_train[i],
                        "is_impossible": E_train[i],
                        "answers": [],
                    }
                },
            ) for i in range(len(X_train))
        ]
        cls.test_records = [
            Record(
                str(i),
                data={
                    "features": {
                        "title": A_test[i],
                        "context": B_test[i],
                        "question": C_test[i],
                        "answer_text": X_test[i],
                        "start_pos_char": D_test[i],
                        "is_impossible": E_test[i],
                        "answers": [],
                    }
                },
            ) for i in range(len(X_test))
        ]

        cls.train_sources = Sources(
            MemorySource(MemorySourceConfig(records=cls.train_records)))
        cls.test_sources = Sources(
            MemorySource(MemorySourceConfig(records=cls.test_records)))
        cls.model_dir = tempfile.TemporaryDirectory()
        cls.model = QAModel(
            QAModelConfig(
                model_name_or_path="bert-base-cased",
                cache_dir=CACHE_DIR,
                directory=cls.model_dir.name,
                log_dir=cls.model_dir.name,
                model_type="bert",
                no_cuda=True,
            ))
Esempio n. 8
0
 def setUp(self):
     self.null = Record("null")
     self.full = Record(
         "full",
         data=dict(
             features=dict(dead="beef"),
             extra=dict(extra="read all about it"),
         ),
         extra=dict(half=True),
     )
Esempio n. 9
0
    async def test_save_and_load(self):
        source = CSVSource(
            filename=self.save_and_load, allowempty=True, readwrite=True
        )
        await save(
            source,
            Record(
                "1",
                data={
                    "features": {"A": 0, "B": 1},
                    "prediction": {"C": {"value": 1, "confidence": 1.0}},
                },
            ),
            Record(
                "2",
                data={
                    "features": {"A": 3, "B": 4},
                    "prediction": {"C": {"value": 2, "confidence": 1.0}},
                },
            ),
        )
        # All records in source
        results = [record.export() async for record in load(source)]
        self.assertEqual(
            results,
            [
                {
                    "key": "1",
                    "features": {"A": 0, "B": 1},
                    "prediction": {"C": {"confidence": 1.0, "value": "1"}},
                    "extra": {},
                },
                {
                    "key": "2",
                    "features": {"A": 3, "B": 4},
                    "prediction": {"C": {"confidence": 1.0, "value": "2"}},
                    "extra": {},
                },
            ],
        )

        # For specific records in a source
        results = [record.export() async for record in load(source, "1")]
        self.assertEqual(
            results,
            [
                {
                    "key": "1",
                    "features": {"A": 0, "B": 1},
                    "prediction": {"C": {"confidence": 1.0, "value": "1"}},
                    "extra": {},
                }
            ],
        )
Esempio n. 10
0
 async def record(self, key: str):
     record = Record(key)
     db = self.conn
     # Get features
     await db.execute("SELECT json FROM ml_data WHERE key=%s", (key, ))
     dump = await db.fetchone()
     if dump is not None and dump[0] is not None:
         record.merge(Record(key, data=json.loads(dump[0])))
     await db.execute("SELECT maintained FROM `status` WHERE key=%s",
                      (key, ))
     maintained = await db.fetchone()
     if maintained is not None and maintained[0] is not None:
         record.evaluated({"maintained": str(maintained[0])})
     return record
Esempio n. 11
0
    def setUpClass(cls):
        A_train, B_train, X = list(zip(*TRAIN_DATA))
        A_predict, B_predict = list(zip(*PREDICT_DATA))

        cls.train_records = [
            Record(
                str(i),
                data={
                    "features": {
                        "sentence_id": A_train[i],
                        "words": B_train[i],
                        "ner_tag": X[i],
                    }
                },
            )
            for i in range(0, len(X))
        ]
        cls.train_sources = Sources(
            MemorySource(MemorySourceConfig(records=cls.train_records))
        )

        cls.predict_records = [
            Record(
                str(i),
                data={
                    "features": {
                        "sentence_id": A_predict[i],
                        "words": B_predict[i],
                    }
                },
            )
            for i in range(0, len(A_predict))
        ]
        cls.predict_sources = Sources(
            MemorySource(MemorySourceConfig(records=cls.predict_records))
        )

        cls.model_dir = tempfile.TemporaryDirectory()
        cls.model = NERModel(
            NERModelConfig(
                sid=Feature("sentence_id", int, 1),
                words=Feature("words", str, 1),
                predict=Feature("ner_tag", str, 1),
                output_dir=cls.model_dir.name,
                model_architecture_type="bert",
                model_name_or_path="bert-base-cased",
                no_cuda=True,
            )
        )
Esempio n. 12
0
 async def test_02_predict(self):
     a = Record("a", data={"features": {self.feature.NAME: 1}})
     b = Record("not a", data={"features": {self.feature.NAME: 0}})
     async with Sources(
         MemorySource(MemorySourceConfig(records=[a, b]))
     ) as sources, self.model as model:
         async with sources() as sctx, model() as mctx:
             num = 0
             async for record, prediction, confidence in mctx.predict(
                 sctx.records()
             ):
                 with self.subTest(record=record):
                     self.assertEqual(prediction, record.key)
                 num += 1
             self.assertEqual(num, 2)
Esempio n. 13
0
    def setUpClass(cls):
        # Create a temporary directory to store the trained model
        cls.model_dir = tempfile.TemporaryDirectory()
        # Create an instance of the model
        cls.model = AnomalyModel(
            features=Features(
                Feature("A", int, 1),
                Feature("B", int, 2),
            ),
            predict=Feature("Y", int, 1),
            directory=cls.model_dir.name,
        )

        # Generating data

        _n_data = 1800
        _temp_data = np.random.normal(2, 1, size=(2, _n_data))
        cls.records = [
            Record(
                "x" + str(random.random()),
                data={
                    "features": {
                        "A": float(_temp_data[0][i]),
                        "B": float(_temp_data[1][i]),
                        "Y":
                        (_temp_data[0][i] > 1 - _temp_data[1][i]).astype(int),
                    }
                },
            ) for i in range(0, _n_data)
        ]

        cls.trainingsource = Sources(
            MemorySource(MemorySourceConfig(records=cls.records[:1400])))
        cls.testsource = Sources(
            MemorySource(MemorySourceConfig(records=cls.records[1400:])))
Esempio n. 14
0
 async def setUp(self):
     await super().setUp()
     self.records = [Record(str(random.random())) for _ in range(0, 10)]
     self.temp_filename = self.mktempfile()
     self.sconfig = FileSourceConfig(filename=self.temp_filename,
                                     readwrite=True,
                                     allowempty=True)
     async with JSONSource(self.sconfig) as source:
         async with source() as sctx:
             for record in self.records:
                 await sctx.update(record)
     contents = json.loads(Path(self.sconfig.filename).read_text())
     # Ensure there are records in the file
     self.assertEqual(
         len(contents.get(self.sconfig.tag)),
         len(self.records),
         "RecordsTestCase JSON file erroneously initialized as empty",
     )
     # TODO(p3) For some reason patching Model.load doesn't work
     self._stack.enter_context(
         patch("dffml.model.model.Model.load", new=model_load))
     self._stack.enter_context(
         patch("dffml.df.base.OperationImplementation.load",
               new=opimp_load))
     self._stack.enter_context(
         patch("dffml.df.types.Operation.load", new=op_load))
Esempio n. 15
0
 def setUpClass(cls):
     cls.model_dir = tempfile.TemporaryDirectory()
     cls.feature1 = Feature("feature_1", float, 1)
     cls.feature2 = Feature("feature_2", float, 1)
     cls.features = Features(cls.feature1, cls.feature2)
     cls.model = DNNRegressionModel(
         DNNRegressionModelConfig(
             directory=cls.model_dir.name,
             steps=1000,
             epochs=40,
             hidden=[50, 20, 10],
             predict=Feature("TARGET", float, 1),
             features=cls.features,
         ))
     # Generating data f(x1,x2) = 2*x1 + 3*x2
     _n_data = 2000
     _temp_data = np.random.rand(2, _n_data)
     cls.records = [
         Record(
             "x" + str(random.random()),
             data={
                 "features": {
                     cls.feature1.name: float(_temp_data[0][i]),
                     cls.feature2.name: float(_temp_data[1][i]),
                     "TARGET": 2 * _temp_data[0][i] + 3 * _temp_data[1][i],
                 }
             },
         ) for i in range(0, _n_data)
     ]
     cls.sources = Sources(
         MemorySource(MemorySourceConfig(records=cls.records)))
Esempio n. 16
0
 async def model_predict(self, request, mctx):
     # TODO Provide an iterkey method for model prediction
     chunk_size = int(request.match_info["chunk_size"])
     if chunk_size != 0:
         return web.json_response(
             {"error": "Multiple request iteration not yet supported"},
             status=HTTPStatus.BAD_REQUEST,
         )
     # Get the records
     records: Dict[str, Record] = {}
     # Create a source with will provide the records
     async with Sources(
         MemorySource(records=[
             Record(key, data=record_data)
             for key, record_data in (await request.json()).items()
         ])) as source:
         async with source() as sctx:
             # Feed them through prediction
             return web.json_response({
                 "iterkey": None,
                 "records": {
                     record.key: record.export()
                     async for record in mctx.predict(sctx)
                 },
             })
Esempio n. 17
0
 async def test_02_predict(self):
     test_feature_val = [
         0,
         1.5,
         2,
     ]  # inserting zero so that its 1-indexable
     test_target = 2 * test_feature_val[1] + 3 * test_feature_val[2]
     # should be same function used in TestDNN.setupclass
     a = Record(
         "a",
         data={
             "features": {
                 self.feature1.name: test_feature_val[1],
                 self.feature2.name: test_feature_val[2],
             }
         },
     )
     async with Sources(MemorySource(MemorySourceConfig(
             records=[a]))) as sources, self.model as model:
         target_name = model.config.predict.name
         async with sources() as sctx, model() as mctx:
             res = [record async for record in mctx.predict(sctx.records())]
             self.assertEqual(len(res), 1)
         self.assertEqual(res[0].key, a.key)
         test_error_norm = abs(
             (test_target - res[0].prediction(target_name).value) /
             test_target + 1e-6)
         error_threshold = 0.3
         self.assertLess(test_error_norm, error_threshold)
Esempio n. 18
0
 async def test_update(self):
     key = "1"
     new_record = Record(key, data={"features": {"by_ten": 10}})
     async with self.post(f"/source/{self.slabel}/update/{key}",
                          json=new_record.export()) as r:
         self.assertEqual(await r.json(), OK)
     self.assertEqual((await self.sctx.record(key)).feature("by_ten"), 10)
Esempio n. 19
0
    async def model_predict(self, request, mctx):
        # TODO Provide an iterkey method for model prediction
        chunk_size = int(request.match_info["chunk_size"])
        if chunk_size != 0:
            return web.json_response(
                {"error": "Multiple request iteration not yet supported"},
                status=HTTPStatus.BAD_REQUEST,
            )
        # Get the records
        records: Dict[str, Record] = {
            key: Record(key, data=record_data)
            for key, record_data in (await request.json()).items()
        }

        # Create an async generator to feed records
        async def record_gen():
            for record in records.values():
                yield record

        # Feed them through prediction
        return web.json_response({
            "iterkey": None,
            "records": {
                record.key: record.export()
                async for record in mctx.predict(record_gen())
            },
        })
Esempio n. 20
0
    def setUpClass(cls):
        # Create a temporary directory to store the trained model
        cls.model_dir = tempfile.TemporaryDirectory()
        # Create an instance of the model
        cls.model = XGBRegressorModel(
            XGBRegressorModelConfig(
                features=Features(Feature("Feature1", float, 1),
                                  Feature("Feature2")),
                predict=Feature("Target", float, 1),
                directory=cls.model_dir.name,
            ))
        # Generating data f(x1,x2) = 2*x1 + 3*x2
        _n_data = 2000
        _temp_data = np.random.rand(2, _n_data)
        cls.records = [
            Record(
                "x" + str(random.random()),
                data={
                    "features": {
                        "Feature1": float(_temp_data[0][i]),
                        "Feature2": float(_temp_data[1][i]),
                        "Target": 2 * _temp_data[0][i] + 3 * _temp_data[1][i],
                    }
                },
            ) for i in range(0, _n_data)
        ]

        cls.trainingsource = Sources(
            MemorySource(MemorySourceConfig(records=cls.records[:1800])))
        cls.testsource = Sources(
            MemorySource(MemorySourceConfig(records=cls.records[1800:])))
Esempio n. 21
0
 def setUpClass(cls):
     cls.features = Features()
     cls.features.append(Feature("A", str, 1))
     A, X = list(zip(*DATA))
     cls.records = [
         Record(str(i), data={"features": {
             "A": A[i],
             "X": X[i]
         }}) for i in range(0, len(X))
     ]
     cls.sources = Sources(
         MemorySource(MemorySourceConfig(records=cls.records)))
     cls.model_dir = tempfile.TemporaryDirectory()
     cls.model = TextClassificationModel(
         TextClassifierConfig(
             directory=cls.model_dir.name,
             classifications=[0, 1],
             features=cls.features,
             predict=Feature("X", int, 1),
             add_layers=True,
             layers=[
                 "Dense(units = 120, activation='relu')",
                 "Dense(units = 64, activation=relu)",
                 "Dense(units = 2, activation='softmax')",
             ],
             model_path=
             "https://tfhub.dev/google/tf2-preview/gnews-swivel-20dim-with-oov/1",
             epochs=30,
         ))
Esempio n. 22
0
    def setUpClass(cls):
        cls.model_dir = tempfile.TemporaryDirectory()
        cls.features = Features()
        cls.features.append(Feature("A", float, 1))
        cls.features.append(Feature("B", float, 1))
        cls.features.append(Feature("C", float, 1))
        cls.features.append(Feature("D", float, 1))
        cls.features.append(Feature("E", float, 1))
        cls.features.append(Feature("F", float, 1))
        cls.features.append(Feature("G", int, 1))
        cls.features.append(Feature("H", int, 1))

        A, B, C, D, E, F, G, H, X = list(zip(*DATA))
        cls.records = [
            Record(
                str(i),
                data={
                    "features": {
                        "A": A[i],
                        "B": B[i],
                        "C": C[i],
                        "D": D[i],
                        "E": E[i],
                        "F": F[i],
                        "G": G[i],
                        "H": H[i],
                        "X": X[i],
                    }
                },
            )
            for i in range(0, len(A))
        ]

        cls.sources = Sources(
            MemorySource(MemorySourceConfig(records=cls.records))
        )
        cls.model = VWModel(
            VWConfig(
                location=cls.model_dir.name,
                features=cls.features,
                predict=Feature("X", float, 1),
                # A and B will be namespace n1
                # A and C will be in namespace n2
                namespace=["n1_A_B", "n2_A_C"],
                importance=Feature("H", int, 1),
                tag=Feature("G", int, 1),
                task="regression",
                vwcmd=[
                    "l2",
                    "0.1",
                    "loss_function",
                    "squared",
                    "passes",
                    "10",
                ],
            )
        )
        cls.scorer = MeanSquaredErrorAccuracy()
Esempio n. 23
0
 async def test_02_predict(self):
     a = Record("a", data={"features": {self.feature.NAME: 1}})
     async with Sources(MemorySource(MemorySourceConfig(
             records=[a]))) as sources, self.model as model:
         target_name = model.config.predict.NAME
         async with sources() as sctx, model() as mctx:
             res = [record async for record in mctx.predict(sctx.records())]
             self.assertEqual(len(res), 1)
         self.assertEqual(res[0].key, a.key)
         self.assertTrue(res[0].prediction(target_name).value)
Esempio n. 24
0
    def setUpClass(cls):
        A_train, X_train = list(zip(*TRAIN_DATA))
        A_test, X_test = list(zip(*TEST_DATA))

        cls.train_records = [
            Record(
                str(i),
                data={
                    "features": {
                        "sentence": A_train[i],
                        "entities": X_train[i],
                    }
                },
            )
            for i in range(len(X_train))
        ]
        cls.test_records = [
            Record(
                str(i),
                data={
                    "features": {"sentence": A_test[i], "entities": X_test[i],}
                },
            )
            for i in range(len(X_test))
        ]

        cls.train_sources = Sources(
            MemorySource(MemorySourceConfig(records=cls.train_records))
        )
        cls.test_sources = Sources(
            MemorySource(MemorySourceConfig(records=cls.test_records))
        )
        cls.model_dir = tempfile.TemporaryDirectory()
        cls.model = SpacyNERModel(
            SpacyNERModelConfig(
                model_name="en_core_web_sm",
                location=cls.model_dir.name,
                n_iter=10,
                dropout=0.4,
            )
        )
        cls.scorer = SpacyNerAccuracy()
Esempio n. 25
0
 async def record(self, key: str):
     # Create a blank record in case it doesn't exist within the source
     record = Record(key)
     # Execute the query to get a single record from a key
     await self.conn.execute(self.parent.config.record, (key,))
     # Retrieve the result
     row = await self.conn.fetchone()
     # Convert it to a record if it exists and populate the previously blank
     # record by merging the two
     if row is not None:
         record.merge(self.row_to_record(row))
     self.logger.debug("Got: %s: %r", record.key, record.export())
     return record
Esempio n. 26
0
 def setUpClass(self):
     self.records = [
         Record(
             str(i),
             data={
                 "features": {
                     "Years": A[i],
                     "Expertise": B[i],
                     "Trust": C[i],
                     "Salary": D[i],
                 }
             },
         ) for i in range(4)
     ]
     self.source = Sources(
         MemorySource(MemorySourceConfig(records=self.records)))
Esempio n. 27
0
 async def test_model(self):
     test_feature_val = [
         0,
         1.5,
         2,
     ]  # inserting zero so that its 1-indexable
     test_target = 2 * test_feature_val[1] + 3 * test_feature_val[2]
     # should be same function used in TestDNN.setupclass
     a = Record(
         "a",
         data={
             "features": {
                 self.feature1.name: test_feature_val[1],
                 self.feature2.name: test_feature_val[2],
             }
         },
     )
     target_name = self.model.config.predict.name
     for i in range(0, 7):
         await train(self.model, self.sources)
         res = await accuracy(self.model, self.sources)
         # Retry because of tensorflow intermitant low accuracy
         if res <= 0.8 and i < 5:
             print("Retry i:", i, "accuracy:", res)
             self.model_dir.cleanup()
             self.model_dir = tempfile.TemporaryDirectory()
             self.model.config = self.model.config._replace(
                 directory=self.model_dir.name
             )
             continue
         self.assertGreater(res, 0.8)
         res = [
             record
             async for record in predict(self.model, a, keep_record=True)
         ]
         self.assertEqual(len(res), 1)
         self.assertEqual(res[0].key, a.key)
         test_error_norm = abs(
             (test_target - res[0].prediction(target_name).value)
             / test_target
             + 1e-6
         )
         error_threshold = 0.3
         self.assertLess(test_error_norm, error_threshold)
Esempio n. 28
0
 def setUpClass(cls):
     cls.model_dir = tempfile.TemporaryDirectory()
     cls.feature = DefFeature("X", float, 1)
     cls.features = Features(cls.feature)
     X, Y = list(zip(*FEATURE_DATA))
     cls.records = [
         Record(str(i), data={"features": {
             "X": X[i],
             "Y": Y[i]
         }}) for i in range(0, len(Y))
     ]
     cls.sources = Sources(
         MemorySource(MemorySourceConfig(records=cls.records)))
     cls.model = SLR(
         SLRConfig(
             directory=cls.model_dir.name,
             predict=DefFeature("Y", float, 1),
             features=cls.features,
         ))
Esempio n. 29
0
 async def setUp(self):
     await super().setUp()
     self.train_data = [
         [0, 1, 0.2, 10],
         [1, 3, 0.4, 20],
         [2, 5, 0.6, 30],
         [3, 7, 0.8, 40],
     ]
     self.test_data = [[4, 9, 1.0, 50], [5, 11, 1.2, 60]]
     self.predict_data = [[6, 13, 1.4], [7, 15, 1.6]]
     for use in ["train", "test", "predict"]:
         records = [
             Record(i,
                    data={"features": dict(zip(FEATURE_NAMES, features))})
             for i, features in enumerate(getattr(self, f"{use}_data"))
         ]
         setattr(self, f"{use}_records", records)
         filename = self.mktempfile() + ".csv"
         setattr(self, f"{use}_filename", filename)
         await self.populate_source(CSVSource, *records, filename=filename)
Esempio n. 30
0
 def row_to_record(self, row):
     features = {}
     predictions = {}
     # Features
     for feature_name, column_name in self.parent.config.features.items():
         features[feature_name] = row[column_name]
     # Predictions
     for (
         feature_name,
         (value_column_name, confidence_column_name),
     ) in self.parent.config.predictions.items():
         predictions[feature_name] = {
             "value": row[value_column_name],
             # Set confidence to Not A Number if not given
             "confidence": row.get(confidence_column_name, float("nan")),
         }
     return Record(
         row[self.parent.config.key],
         data={"features": features, "prediction": predictions},
     )