Exemple #1
0
 async def test_tag(self):
     with tempfile.TemporaryDirectory() as testdir:
         self.testfile = os.path.join(testdir, str(random.random()))
         untagged = await self.setUpSource()
         tagged = await self.setUpSource()
         tagged.config = tagged.config._replace(tag="sometag")
         async with untagged, tagged:
             async with untagged() as uctx, tagged() as lctx:
                 await uctx.update(
                     Record("0", data={"features": {
                         "feed": 1
                     }}))
                 await lctx.update(
                     Record("0", data={"features": {
                         "face": 2
                     }}))
             # async with untagged, tagged:
             async with untagged() as uctx, tagged() as lctx:
                 record = await uctx.record("0")
                 self.assertIn("feed", record.features())
                 record = await lctx.record("0")
                 self.assertIn("face", record.features())
         with open(self.testfile, "r") as fd:
             dict_reader = csv.DictReader(fd, dialect="strip")
             rows = {row["tag"]: {row["key"]: row} for row in dict_reader}
             self.assertIn("untagged", rows)
             self.assertIn("sometag", rows)
             self.assertIn("0", rows["untagged"])
             self.assertIn("0", rows["sometag"])
             self.assertIn("feed", rows["untagged"]["0"])
             self.assertIn("face", rows["sometag"]["0"])
             self.assertEqual("1", rows["untagged"]["0"]["feed"])
             self.assertEqual("2", rows["sometag"]["0"]["face"])
Exemple #2
0
    async def record(self, key: str):
        record = Record(key)
        async with self.parent.db() as db_ctx:
            try:
                row = await db_ctx.lookup(
                    self.parent.config.table_name,
                    cols=None,  # None turns into *. We want all rows
                    conditions=[[Condition("key", "=", key)]],
                ).__anext__()
            except StopAsyncIteration:
                # This would happen if there is no matching row, so the async generator reached the end
                return record

        if row is not None:
            features = {}
            predictions = {}
            for key, value in row.items():
                if key.startswith("feature_"):
                    features[key.replace("feature_", "")] = value
                elif "_value" in key:
                    target = key.replace("_value", "")
                    predictions[target] = {
                        "value": row[target + "_value"],
                        "confidence": row[target + "_confidence"],
                    }
            record.merge(
                Record(
                    row["key"],
                    data={
                        "features": features,
                        "prediction": predictions
                    },
                ))
        return record
Exemple #3
0
 def setUpClass(cls):
     cls.feature = StartsWithA()
     cls.features = Features(cls.feature)
     cls.model_dir = tempfile.TemporaryDirectory()
     cls.model = MiscModel(
         MiscModelConfig(
             directory=cls.model_dir.name,
             classifications=["not a", "a"],
             features=cls.features,
         )
     )
     cls.records = [
         Record(
             "a" + str(random.random()),
             data={"features": {cls.feature.NAME: 1, "string": "a"}},
         )
         for _ in range(0, 1000)
     ]
     cls.records += [
         Record(
             "b" + str(random.random()),
             data={"features": {cls.feature.NAME: 0, "string": "not a"}},
         )
         for _ in range(0, 1000)
     ]
     cls.sources = Sources(
         MemorySource(MemorySourceConfig(records=cls.records))
     )
Exemple #4
0
 def setUpClass(cls):
     cls.model_dir = tempfile.TemporaryDirectory()
     cls.feature = Feature("starts_with_a", int, 1)
     cls.features = Features(cls.feature)
     cls.records = [
         Record(
             "a" + str(random.random()),
             data={"features": {
                 cls.feature.name: 1,
                 "string": "a"
             }},
         ) for _ in range(0, 1000)
     ]
     cls.records += [
         Record(
             "b" + str(random.random()),
             data={"features": {
                 cls.feature.name: 0,
                 "string": "not a"
             }},
         ) for _ in range(0, 1000)
     ]
     cls.sources = Sources(
         MemorySource(MemorySourceConfig(records=cls.records)))
     cls.model = DNNClassifierModel(
         DNNClassifierModelConfig(
             directory=cls.model_dir.name,
             steps=1000,
             epochs=40,
             hidden=[50, 20, 10],
             predict=Feature("string", str, 1),
             classifications=["a", "not a"],
             clstype=str,
             features=cls.features,
         ))
Exemple #5
0
    async def record(self, key: str):
        query = self.parent.config.record_query
        record = Record(key)
        db = self.conn
        await db.execute(query, (key, ))
        row = await db.fetchone()

        if row is not None:
            features = {}
            predictions = {}
            for key, value in row.items():
                if key.startswith("feature_"):
                    features[key.replace("feature_", "")] = value
                elif "_value" in key:
                    target = key.replace("_value", "")
                    predictions[target] = {
                        "value": row[target + "_value"],
                        "confidence": row[target + "_confidence"],
                    }
            record.merge(
                Record(
                    row["key"],
                    data={
                        "features": features,
                        "prediction": predictions
                    },
                ))
        return record
Exemple #6
0
 async def update(self, record: Record):
     # Column name of value mapping
     bindings = {self.parent.config.key: record.key}
     # Features
     features = record.features(self.parent.config.features.keys())
     for feature_name, column_name in self.parent.config.features.items():
         bindings[column_name] = features.get(feature_name, None)
     # Predictions
     predictions = record.predictions(self.parent.config.predictions.keys())
     for (
         feature_name,
         (value_column_name, confidence_column_name),
     ) in self.parent.config.predictions.items():
         bindings[value_column_name] = None
         if confidence_column_name is not None:
             bindings[confidence_column_name] = None
         if feature_name in predictions:
             bindings[value_column_name] = predictions[feature_name][
                 "value"
             ]
             if confidence_column_name is not None:
                 bindings[confidence_column_name] = predictions[
                     feature_name
                 ]["confidence"]
     # Bindings should be the values for each column, where the value for the
     # key is not repeated for the UPDATE. If using REPLACE INTO, don't
     # repeat values
     values = list(bindings.values())
     if not "REPLACE" in self.parent.config.update.upper():
         values += list(bindings.values())[1:]
     # Execute the update query
     await self.conn.execute(self.parent.config.update, values)
     self.logger.debug("Updated: %s: %r", record.key, bindings)
Exemple #7
0
 async def update(self, record: Record):
     db = self.parent.db
     # Store feature data
     feature_cols = self.parent.FEATURE_COLS
     feature_data = OrderedDict.fromkeys(feature_cols)
     feature_data.update(record.features(feature_cols))
     await db.execute(
         "INSERT OR REPLACE INTO features (key, " +
         ", ".join(feature_cols) + ") "
         "VALUES(?, " + ", ".join("?" * len(feature_cols)) + ")",
         [record.key] + list(feature_data.values()),
     )
     # Store prediction
     try:
         prediction = record.prediction("target_name")
         prediction_cols = self.parent.PREDICTION_COLS
         prediction_data = OrderedDict.fromkeys(prediction_cols)
         prediction_data.update(prediction.dict())
         await db.execute(
             "INSERT OR REPLACE INTO prediction (key, " +
             ", ".join(prediction_cols) + ") "
             "VALUES(?, " + ", ".join("?" * len(prediction_cols)) + ")",
             [record.key] + list(prediction_data.values()),
         )
     except KeyError:
         pass
Exemple #8
0
 async def test_update(self):
     key = "1"
     new_record = Record(key, data={"features": {"by_ten": 10}})
     async with self.post(f"/source/{self.slabel}/update/{key}",
                          json=new_record.export()) as r:
         self.assertEqual(await r.json(), OK)
     self.assertEqual((await self.sctx.record(key)).feature("by_ten"), 10)
Exemple #9
0
    async def test_ini(self):
        with TemporaryDirectory() as testdir:
            self.testfile = os.path.join(testdir, "testfile.ini")
            # Create a source
            source = INISource(filename=self.testfile,
                               allowempty=True,
                               readwrite=True)
            # Save some data in the source
            await save(
                source,
                Record("section1", data={"features": {
                    "A": 1,
                    "B": 2
                }}),
                Record("section2", data={"features": {
                    "C": 3,
                    "D": 4
                }}),
            )
            # Load all the records
            records = [record async for record in load(source)]

            self.assertIsInstance(records, list)
            self.assertEqual(len(records), 2)
            self.assertDictEqual(records[0].features(), {"a": 1, "b": 2})
            self.assertDictEqual(records[1].features(), {"c": 3, "d": 4})
Exemple #10
0
    def setUpClass(cls):
        (
            A_train,
            B_train,
            C_train,
            X_train,
            D_train,
            E_train,
        ) = list(zip(*TRAIN_DATA))
        A_test, B_test, C_test, X_test, D_test, E_test = list(zip(*TEST_DATA))

        cls.train_records = [
            Record(
                str(i),
                data={
                    "features": {
                        "title": A_train[i],
                        "context": B_train[i],
                        "question": C_train[i],
                        "answer_text": X_train[i],
                        "start_pos_char": D_train[i],
                        "is_impossible": E_train[i],
                        "answers": [],
                    }
                },
            ) for i in range(len(X_train))
        ]
        cls.test_records = [
            Record(
                str(i),
                data={
                    "features": {
                        "title": A_test[i],
                        "context": B_test[i],
                        "question": C_test[i],
                        "answer_text": X_test[i],
                        "start_pos_char": D_test[i],
                        "is_impossible": E_test[i],
                        "answers": [],
                    }
                },
            ) for i in range(len(X_test))
        ]

        cls.train_sources = Sources(
            MemorySource(MemorySourceConfig(records=cls.train_records)))
        cls.test_sources = Sources(
            MemorySource(MemorySourceConfig(records=cls.test_records)))
        cls.model_dir = tempfile.TemporaryDirectory()
        cls.model = QAModel(
            QAModelConfig(
                model_name_or_path="bert-base-cased",
                cache_dir=CACHE_DIR,
                directory=cls.model_dir.name,
                log_dir=cls.model_dir.name,
                model_type="bert",
                no_cuda=True,
            ))
Exemple #11
0
 def setUp(self):
     self.null = Record("null")
     self.full = Record(
         "full",
         data=dict(
             features=dict(dead="beef"),
             extra=dict(extra="read all about it"),
         ),
         extra=dict(half=True),
     )
Exemple #12
0
    async def test_save_and_load(self):
        source = CSVSource(
            filename=self.save_and_load, allowempty=True, readwrite=True
        )
        await save(
            source,
            Record(
                "1",
                data={
                    "features": {"A": 0, "B": 1},
                    "prediction": {"C": {"value": 1, "confidence": 1.0}},
                },
            ),
            Record(
                "2",
                data={
                    "features": {"A": 3, "B": 4},
                    "prediction": {"C": {"value": 2, "confidence": 1.0}},
                },
            ),
        )
        # All records in source
        results = [record.export() async for record in load(source)]
        self.assertEqual(
            results,
            [
                {
                    "key": "1",
                    "features": {"A": 0, "B": 1},
                    "prediction": {"C": {"confidence": 1.0, "value": "1"}},
                    "extra": {},
                },
                {
                    "key": "2",
                    "features": {"A": 3, "B": 4},
                    "prediction": {"C": {"confidence": 1.0, "value": "2"}},
                    "extra": {},
                },
            ],
        )

        # For specific records in a source
        results = [record.export() async for record in load(source, "1")]
        self.assertEqual(
            results,
            [
                {
                    "key": "1",
                    "features": {"A": 0, "B": 1},
                    "prediction": {"C": {"confidence": 1.0, "value": "1"}},
                    "extra": {},
                }
            ],
        )
Exemple #13
0
 async def record(self, key: str):
     # Create a blank record in case it doesn't exist within the source
     record = Record(key)
     # Execute the query to get a single record from a key
     await self.conn.execute(self.parent.config.record, (key,))
     # Retrieve the result
     row = await self.conn.fetchone()
     # Convert it to a record if it exists and populate the previously blank
     # record by merging the two
     if row is not None:
         record.merge(self.row_to_record(row))
     self.logger.debug("Got: %s: %r", record.key, record.export())
     return record
Exemple #14
0
    def setUpClass(cls):
        A_train, B_train, X = list(zip(*TRAIN_DATA))
        A_predict, B_predict = list(zip(*PREDICT_DATA))

        cls.train_records = [
            Record(
                str(i),
                data={
                    "features": {
                        "sentence_id": A_train[i],
                        "words": B_train[i],
                        "ner_tag": X[i],
                    }
                },
            )
            for i in range(0, len(X))
        ]
        cls.train_sources = Sources(
            MemorySource(MemorySourceConfig(records=cls.train_records))
        )

        cls.predict_records = [
            Record(
                str(i),
                data={
                    "features": {
                        "sentence_id": A_predict[i],
                        "words": B_predict[i],
                    }
                },
            )
            for i in range(0, len(A_predict))
        ]
        cls.predict_sources = Sources(
            MemorySource(MemorySourceConfig(records=cls.predict_records))
        )

        cls.model_dir = tempfile.TemporaryDirectory()
        cls.model = NERModel(
            NERModelConfig(
                sid=Feature("sentence_id", int, 1),
                words=Feature("words", str, 1),
                predict=Feature("ner_tag", str, 1),
                output_dir=cls.model_dir.name,
                model_architecture_type="bert",
                model_name_or_path="bert-base-cased",
                no_cuda=True,
            )
        )
Exemple #15
0
 async def test_02_predict(self):
     a = Record("a", data={"features": {self.feature.NAME: 1}})
     b = Record("not a", data={"features": {self.feature.NAME: 0}})
     async with Sources(
         MemorySource(MemorySourceConfig(records=[a, b]))
     ) as sources, self.model as model:
         async with sources() as sctx, model() as mctx:
             num = 0
             async for record, prediction, confidence in mctx.predict(
                 sctx.records()
             ):
                 with self.subTest(record=record):
                     self.assertEqual(prediction, record.key)
                 num += 1
             self.assertEqual(num, 2)
    def setUpClass(cls):
        # Create a temporary directory to store the trained model
        cls.model_dir = tempfile.TemporaryDirectory()
        # Create an instance of the model
        cls.model = AnomalyModel(
            features=Features(
                Feature("A", int, 1),
                Feature("B", int, 2),
            ),
            predict=Feature("Y", int, 1),
            directory=cls.model_dir.name,
        )

        # Generating data

        _n_data = 1800
        _temp_data = np.random.normal(2, 1, size=(2, _n_data))
        cls.records = [
            Record(
                "x" + str(random.random()),
                data={
                    "features": {
                        "A": float(_temp_data[0][i]),
                        "B": float(_temp_data[1][i]),
                        "Y":
                        (_temp_data[0][i] > 1 - _temp_data[1][i]).astype(int),
                    }
                },
            ) for i in range(0, _n_data)
        ]

        cls.trainingsource = Sources(
            MemorySource(MemorySourceConfig(records=cls.records[:1400])))
        cls.testsource = Sources(
            MemorySource(MemorySourceConfig(records=cls.records[1400:])))
Exemple #17
0
 def setUpClass(cls):
     cls.features = Features()
     cls.features.append(Feature("A", str, 1))
     A, X = list(zip(*DATA))
     cls.records = [
         Record(str(i), data={"features": {
             "A": A[i],
             "X": X[i]
         }}) for i in range(0, len(X))
     ]
     cls.sources = Sources(
         MemorySource(MemorySourceConfig(records=cls.records)))
     cls.model_dir = tempfile.TemporaryDirectory()
     cls.model = TextClassificationModel(
         TextClassifierConfig(
             directory=cls.model_dir.name,
             classifications=[0, 1],
             features=cls.features,
             predict=Feature("X", int, 1),
             add_layers=True,
             layers=[
                 "Dense(units = 120, activation='relu')",
                 "Dense(units = 64, activation=relu)",
                 "Dense(units = 2, activation='softmax')",
             ],
             model_path=
             "https://tfhub.dev/google/tf2-preview/gnews-swivel-20dim-with-oov/1",
             epochs=30,
         ))
Exemple #18
0
 async def input_set(self, record: Record) -> List[Input]:
     return ([
         Input(
             value=record.feature(feature.name),
             definition=Definition(
                 name=feature.name,
                 primitive=str(feature.dtype()),
             ),
         ) for feature in self.parent.config.features
     ] + [
         Input(
             value=value,
             definition=self.parent.config.dataflow.definitions[name],
         ) for value, name in self.parent.config.inputs
     ] + ([] if not self.parent.config.length else [
         Input(
             value=await self.sctx.length(),
             definition=Definition(
                 name=self.parent.config.length,
                 primitive="int",
             ),
         )
     ]) + ([] if not self.parent.config.record_def else [
         Input(
             value=record.key,
             definition=Definition(
                 name=self.parent.config.record_def,
                 primitive="string",
             ),
         )
     ]))
Exemple #19
0
 def setUpClass(cls):
     cls.model_dir = tempfile.TemporaryDirectory()
     cls.feature1 = Feature("feature_1", float, 1)
     cls.feature2 = Feature("feature_2", float, 1)
     cls.features = Features(cls.feature1, cls.feature2)
     cls.model = DNNRegressionModel(
         DNNRegressionModelConfig(
             directory=cls.model_dir.name,
             steps=1000,
             epochs=40,
             hidden=[50, 20, 10],
             predict=Feature("TARGET", float, 1),
             features=cls.features,
         ))
     # Generating data f(x1,x2) = 2*x1 + 3*x2
     _n_data = 2000
     _temp_data = np.random.rand(2, _n_data)
     cls.records = [
         Record(
             "x" + str(random.random()),
             data={
                 "features": {
                     cls.feature1.name: float(_temp_data[0][i]),
                     cls.feature2.name: float(_temp_data[1][i]),
                     "TARGET": 2 * _temp_data[0][i] + 3 * _temp_data[1][i],
                 }
             },
         ) for i in range(0, _n_data)
     ]
     cls.sources = Sources(
         MemorySource(MemorySourceConfig(records=cls.records)))
Exemple #20
0
 async def setUp(self):
     await super().setUp()
     self.records = [Record(str(random.random())) for _ in range(0, 10)]
     self.temp_filename = self.mktempfile()
     self.sconfig = FileSourceConfig(filename=self.temp_filename,
                                     readwrite=True,
                                     allowempty=True)
     async with JSONSource(self.sconfig) as source:
         async with source() as sctx:
             for record in self.records:
                 await sctx.update(record)
     contents = json.loads(Path(self.sconfig.filename).read_text())
     # Ensure there are records in the file
     self.assertEqual(
         len(contents.get(self.sconfig.tag)),
         len(self.records),
         "RecordsTestCase JSON file erroneously initialized as empty",
     )
     # TODO(p3) For some reason patching Model.load doesn't work
     self._stack.enter_context(
         patch("dffml.model.model.Model.load", new=model_load))
     self._stack.enter_context(
         patch("dffml.df.base.OperationImplementation.load",
               new=opimp_load))
     self._stack.enter_context(
         patch("dffml.df.types.Operation.load", new=op_load))
Exemple #21
0
 async def test_02_predict(self):
     test_feature_val = [
         0,
         1.5,
         2,
     ]  # inserting zero so that its 1-indexable
     test_target = 2 * test_feature_val[1] + 3 * test_feature_val[2]
     # should be same function used in TestDNN.setupclass
     a = Record(
         "a",
         data={
             "features": {
                 self.feature1.name: test_feature_val[1],
                 self.feature2.name: test_feature_val[2],
             }
         },
     )
     async with Sources(MemorySource(MemorySourceConfig(
             records=[a]))) as sources, self.model as model:
         target_name = model.config.predict.name
         async with sources() as sctx, model() as mctx:
             res = [record async for record in mctx.predict(sctx.records())]
             self.assertEqual(len(res), 1)
         self.assertEqual(res[0].key, a.key)
         test_error_norm = abs(
             (test_target - res[0].prediction(target_name).value) /
             test_target + 1e-6)
         error_threshold = 0.3
         self.assertLess(test_error_norm, error_threshold)
Exemple #22
0
 async def model_predict(self, request, mctx):
     # TODO Provide an iterkey method for model prediction
     chunk_size = int(request.match_info["chunk_size"])
     if chunk_size != 0:
         return web.json_response(
             {"error": "Multiple request iteration not yet supported"},
             status=HTTPStatus.BAD_REQUEST,
         )
     # Get the records
     records: Dict[str, Record] = {}
     # Create a source with will provide the records
     async with Sources(
         MemorySource(records=[
             Record(key, data=record_data)
             for key, record_data in (await request.json()).items()
         ])) as source:
         async with source() as sctx:
             # Feed them through prediction
             return web.json_response({
                 "iterkey": None,
                 "records": {
                     record.key: record.export()
                     async for record in mctx.predict(sctx)
                 },
             })
    def setUpClass(cls):
        # Create a temporary directory to store the trained model
        cls.model_dir = tempfile.TemporaryDirectory()
        # Create an instance of the model
        cls.model = XGBRegressorModel(
            XGBRegressorModelConfig(
                features=Features(Feature("Feature1", float, 1),
                                  Feature("Feature2")),
                predict=Feature("Target", float, 1),
                directory=cls.model_dir.name,
            ))
        # Generating data f(x1,x2) = 2*x1 + 3*x2
        _n_data = 2000
        _temp_data = np.random.rand(2, _n_data)
        cls.records = [
            Record(
                "x" + str(random.random()),
                data={
                    "features": {
                        "Feature1": float(_temp_data[0][i]),
                        "Feature2": float(_temp_data[1][i]),
                        "Target": 2 * _temp_data[0][i] + 3 * _temp_data[1][i],
                    }
                },
            ) for i in range(0, _n_data)
        ]

        cls.trainingsource = Sources(
            MemorySource(MemorySourceConfig(records=cls.records[:1800])))
        cls.testsource = Sources(
            MemorySource(MemorySourceConfig(records=cls.records[1800:])))
Exemple #24
0
    async def model_predict(self, request, mctx):
        # TODO Provide an iterkey method for model prediction
        chunk_size = int(request.match_info["chunk_size"])
        if chunk_size != 0:
            return web.json_response(
                {"error": "Multiple request iteration not yet supported"},
                status=HTTPStatus.BAD_REQUEST,
            )
        # Get the records
        records: Dict[str, Record] = {
            key: Record(key, data=record_data)
            for key, record_data in (await request.json()).items()
        }

        # Create an async generator to feed records
        async def record_gen():
            for record in records.values():
                yield record

        # Feed them through prediction
        return web.json_response({
            "iterkey": None,
            "records": {
                record.key: record.export()
                async for record in mctx.predict(record_gen())
            },
        })
Exemple #25
0
    def setUpClass(cls):
        cls.model_dir = tempfile.TemporaryDirectory()
        cls.features = Features()
        cls.features.append(Feature("A", float, 1))
        cls.features.append(Feature("B", float, 1))
        cls.features.append(Feature("C", float, 1))
        cls.features.append(Feature("D", float, 1))
        cls.features.append(Feature("E", float, 1))
        cls.features.append(Feature("F", float, 1))
        cls.features.append(Feature("G", int, 1))
        cls.features.append(Feature("H", int, 1))

        A, B, C, D, E, F, G, H, X = list(zip(*DATA))
        cls.records = [
            Record(
                str(i),
                data={
                    "features": {
                        "A": A[i],
                        "B": B[i],
                        "C": C[i],
                        "D": D[i],
                        "E": E[i],
                        "F": F[i],
                        "G": G[i],
                        "H": H[i],
                        "X": X[i],
                    }
                },
            )
            for i in range(0, len(A))
        ]

        cls.sources = Sources(
            MemorySource(MemorySourceConfig(records=cls.records))
        )
        cls.model = VWModel(
            VWConfig(
                location=cls.model_dir.name,
                features=cls.features,
                predict=Feature("X", float, 1),
                # A and B will be namespace n1
                # A and C will be in namespace n2
                namespace=["n1_A_B", "n2_A_C"],
                importance=Feature("H", int, 1),
                tag=Feature("G", int, 1),
                task="regression",
                vwcmd=[
                    "l2",
                    "0.1",
                    "loss_function",
                    "squared",
                    "passes",
                    "10",
                ],
            )
        )
        cls.scorer = MeanSquaredErrorAccuracy()
Exemple #26
0
 async def test_02_predict(self):
     a = Record("a", data={"features": {self.feature.NAME: 1}})
     async with Sources(MemorySource(MemorySourceConfig(
             records=[a]))) as sources, self.model as model:
         target_name = model.config.predict.NAME
         async with sources() as sctx, model() as mctx:
             res = [record async for record in mctx.predict(sctx.records())]
             self.assertEqual(len(res), 1)
         self.assertEqual(res[0].key, a.key)
         self.assertTrue(res[0].prediction(target_name).value)
Exemple #27
0
    def setUpClass(cls):
        A_train, X_train = list(zip(*TRAIN_DATA))
        A_test, X_test = list(zip(*TEST_DATA))

        cls.train_records = [
            Record(
                str(i),
                data={
                    "features": {
                        "sentence": A_train[i],
                        "entities": X_train[i],
                    }
                },
            )
            for i in range(len(X_train))
        ]
        cls.test_records = [
            Record(
                str(i),
                data={
                    "features": {"sentence": A_test[i], "entities": X_test[i],}
                },
            )
            for i in range(len(X_test))
        ]

        cls.train_sources = Sources(
            MemorySource(MemorySourceConfig(records=cls.train_records))
        )
        cls.test_sources = Sources(
            MemorySource(MemorySourceConfig(records=cls.test_records))
        )
        cls.model_dir = tempfile.TemporaryDirectory()
        cls.model = SpacyNERModel(
            SpacyNERModelConfig(
                model_name="en_core_web_sm",
                location=cls.model_dir.name,
                n_iter=10,
                dropout=0.4,
            )
        )
        cls.scorer = SpacyNerAccuracy()
Exemple #28
0
 async def test_predict(self):
     records: Dict[str, Record] = {
         record.key: record.export()
         async for record in self.sctx.records()
     }
     async with self.post(f"/model/{self.mlabel}/predict/0",
                          json=records) as r:
         i: int = 0
         response = await r.json()
         for key, record_data in response["records"].items():
             record = Record(key, data=record_data)
             self.assertEqual(int(record.key), i)
             self.assertEqual(
                 record.feature("by_ten"),
                 record.prediction("Salary").value / 10,
             )
             self.assertEqual(float(record.key),
                              record.prediction("Salary").confidence)
             i += 1
         self.assertEqual(i, self.num_records)
Exemple #29
0
 async def update(self, record: Record):
     db = self.conn
     # Just dump it (if you want a setup the queries easily, then you need to
     # massage the columns in this table to your liking, and perhaps add more
     # tables.
     marshall = json.dumps(record.dict())
     await db.execute(
         "INSERT INTO ml_data (key, json) VALUES(%s, %s) "
         "ON DUPLICATE KEY UPDATE json = %s",
         (record.key, marshall, marshall),
     )
     self.logger.debug("updated: %s", marshall)
     self.logger.debug("update: %s", await self.record(record.key))
Exemple #30
0
 def setUpClass(self):
     self.records = [
         Record(
             str(i),
             data={
                 "features": {
                     "Years": A[i],
                     "Expertise": B[i],
                     "Trust": C[i],
                     "Salary": D[i],
                 }
             },
         ) for i in range(4)
     ]
     self.source = Sources(
         MemorySource(MemorySourceConfig(records=self.records)))