async def test_sources(self): with tempfile.TemporaryDirectory() as tempdir: # Source the HTTP API will pre-load source = JSONSource( filename=str(pathlib.Path(tempdir, "source.json")), allowempty=True, readwrite=True, ) # Record the source will have in it myrecord = Record("myrecord", data={"features": {"f1": 0}}) await save(source, myrecord) async with ServerRunner.patch(HTTPService.server) as tserver: cli = await tserver.start( HTTPService.server.cli( "-insecure", "-port", "0", "-sources", "mysource=json", "-source-mysource-filename", source.config.filename, ) ) async with self.get( cli, "/source/mysource/record/myrecord" ) as r: self.assertEqual(await r.json(), myrecord.export())
async def update(self, record: Record): db = self.parent.db # Store feature data feature_cols = self.parent.FEATURE_COLS feature_data = OrderedDict.fromkeys(feature_cols) feature_data.update(record.features(feature_cols)) await db.execute( "INSERT OR REPLACE INTO features (key, " + ", ".join(feature_cols) + ") " "VALUES(?, " + ", ".join("?" * len(feature_cols)) + ")", [record.key] + list(feature_data.values()), ) # Store prediction try: prediction = record.prediction("target_name") prediction_cols = self.parent.PREDICTION_COLS prediction_data = OrderedDict.fromkeys(prediction_cols) prediction_data.update(prediction.dict()) await db.execute( "INSERT OR REPLACE INTO prediction (key, " + ", ".join(prediction_cols) + ") " "VALUES(?, " + ", ".join("?" * len(prediction_cols)) + ")", [record.key] + list(prediction_data.values()), ) except KeyError: pass
async def _add_memory_source(self): async with MemorySource(records=[ Record(str(i), data={"features": { "by_ten": i * 10 }}) for i in range(0, self.num_records) ]) as source: self.source = self.cli.app["sources"][self.slabel] = source async with source() as sctx: self.sctx = self.cli.app["source_contexts"][self.slabel] = sctx yield
async def __aenter__(self): """ Populate the source when it's context is entered """ for yaml_path in self.config.directory.rglob("*.yml"): for doc in yaml.safe_load_all(yaml_path.read_text()): key = ( f'https://github.com/{yaml_path.parent.name}/{doc["name"]}' ) self.mem[key] = Record(key, data={"features": doc}) self.logger.debug("%r loaded %d records", self, len(self.mem)) return self
async def update(self, record: Record): db = self.conn # Just dump it (if you want a setup the queries easily, then you need to # massage the columns in this table to your liking, and perhaps add more # tables. marshall = json.dumps(record.dict()) await db.execute( "INSERT INTO ml_data (key, json) VALUES(%s, %s) " "ON DUPLICATE KEY UPDATE json = %s", (record.key, marshall, marshall), ) self.logger.debug("updated: %s", marshall) self.logger.debug("update: %s", await self.record(record.key))
async def record(self, key: str): record = Record(key) db = self.conn # Get features await db.execute("SELECT json FROM ml_data WHERE key=%s", (key, )) dump = await db.fetchone() if dump is not None and dump[0] is not None: record.merge(Record(key, data=json.loads(dump[0]))) await db.execute("SELECT maintained FROM `status` WHERE key=%s", (key, )) maintained = await db.fetchone() if maintained is not None and maintained[0] is not None: record.evaluated({"maintained": str(maintained[0])}) return record
async def record(self, key: str): db = self.parent.db record = Record(key) # Get features features = await db.execute( "SELECT " + ", ".join(self.parent.FEATURE_COLS) + " " "FROM features WHERE key=?", (record.key, ), ) features = await features.fetchone() if features is not None: record.evaluated(features) # Get prediction prediction = await db.execute( "SELECT * FROM prediction WHERE " "key=?", (record.key, )) prediction = await prediction.fetchone() if prediction is not None: record.predicted("target_name", prediction["value"], prediction["confidence"]) return record
async def test_update(self): mydict = [{"A": 1, "B": 2, "C": 3}] df = pd.DataFrame(mydict) source = DataFrameSource( DataFrameSourceConfig(dataframe=df, predictions=["C", "B"])) # Save some data in the source await save( source, Record("1", data={"features": { "A": 4, "B": 5, "C": 6 }}), Record("2", data={"features": { "A": 7, "B": 8, "C": 9 }}), ) await save(source, Record("2", data={"features": { "A": 15, "B": 16, "C": 14 }})) records = [record async for record in load(source)] self.assertEqual(len(records), 3) self.assertDictEqual(records[0].features(), {"A": 1}) self.assertDictEqual( records[0].predictions(), { "B": { "confidence": 0.0, "value": 2 }, "C": { "confidence": 0.0, "value": 3 }, }, ) self.assertDictEqual(records[1].features(), {"A": 4}) self.assertDictEqual( records[1].predictions(), { "B": { "confidence": 0.0, "value": 5 }, "C": { "confidence": 0.0, "value": 6 }, }, ) self.assertDictEqual(records[2].features(), { "A": 15, }) self.assertDictEqual( records[2].predictions(), { "B": { "confidence": 0.0, "value": 16 }, "C": { "confidence": 0.0, "value": 14 }, }, )
async def test_dataframe(self): mydict = [{"A": 1, "B": 2, "C": 3}] df = pd.DataFrame(mydict) source = DataFrameSource( DataFrameSourceConfig( dataframe=df, predictions=["C"], )) # Save some data in the source await save( source, Record( "1", data={ "features": { "A": 4, "B": 5 }, "prediction": { "C": { "value": 6 } }, }, ), Record( "2", data={ "features": { "A": 7, "B": 8 }, "prediction": { "C": { "value": 9 } }, }, ), ) # Load all the records records = [record async for record in load(source)] self.assertIsInstance(records, list) self.assertEqual(len(records), 3) self.assertDictEqual(records[0].features(), {"A": 1, "B": 2}) self.assertDictEqual(records[0].predictions(), {"C": { "confidence": 0.0, "value": 3 }}) self.assertDictEqual(records[1].features(), {"A": 4, "B": 5}) self.assertDictEqual(records[1].predictions(), {"C": { "confidence": 0.0, "value": 6 }}) self.assertDictEqual(records[2].features(), {"A": 7, "B": 8}) self.assertDictEqual(records[2].predictions(), {"C": { "confidence": 0.0, "value": 9 }})
async def test_scorer(self): with tempfile.TemporaryDirectory() as tempdir: model = SLRModel( features=Features(Feature("f1", float, 1)), predict=Feature("ans", int, 1), location=tempdir, ) # y = m * x + b for equation SLR is solving for m = 5 b = 3 # Train the model await train(model, *[{ "f1": x, "ans": m * x + b } for x in range(0, 10)]) source = JSONSource( filename=pathlib.Path(tempdir, "source.json"), allowempty=True, readwrite=True, ) # Record the source will have in it await save( source, *[ Record( str(i), data={"features": { "f1": x, "ans": (m * x) + b }}, ) for i, x in enumerate(range(10, 20)) ], ) async with ServerRunner.patch(HTTPService.server) as tserver: cli = await tserver.start( HTTPService.server.cli( "-insecure", "-port", "0", "-models", "mymodel=slr", "-model-mymodel-location", tempdir, "-model-mymodel-features", "f1:float:1", "-model-mymodel-predict", "ans:int:1", "-features", "ans:int:1", "-sources", "mysource=json", "-source-mysource-filename", str(source.config.filename), "-scorers", "myscorer=mse", )) async with self.post(cli, "/scorer/myscorer/mymodel/score", json=["mysource"]) as r: self.assertEqual(await r.json(), {"accuracy": 0.0})