async def test_save_and_load(self): source = CSVSource( filename=self.save_and_load, allowempty=True, readwrite=True ) await save( source, Record( "1", data={ "features": {"A": 0, "B": 1}, "prediction": {"C": {"value": 1, "confidence": 1.0}}, }, ), Record( "2", data={ "features": {"A": 3, "B": 4}, "prediction": {"C": {"value": 2, "confidence": 1.0}}, }, ), ) # All records in source results = [record.export() async for record in load(source)] self.assertEqual( results, [ { "key": "1", "features": {"A": 0, "B": 1}, "prediction": {"C": {"confidence": 1.0, "value": "1"}}, "extra": {}, }, { "key": "2", "features": {"A": 3, "B": 4}, "prediction": {"C": {"confidence": 1.0, "value": "2"}}, "extra": {}, }, ], ) # For specific records in a source results = [record.export() async for record in load(source, "1")] self.assertEqual( results, [ { "key": "1", "features": {"A": 0, "B": 1}, "prediction": {"C": {"confidence": 1.0, "value": "1"}}, "extra": {}, } ], )
async def main(): # Grab arguments from command line url = sys.argv[1] cache_dir = pathlib.Path(sys.argv[2]) # Usage via Source class set as property .source of function records = [ record async for record in load( my_training_dataset.source(url=url, cache_dir=cache_dir)) ] # Create a test case to do comparisons tc = unittest.TestCase() tc.assertEqual(len(records), 5) tc.assertDictEqual( records[0].export(), { "key": "0", "features": { "feed": 0.0, "face": 0, "dead": 0, "beef": 0 }, "extra": {}, }, ) # Usage as context manager to create source async with my_training_dataset(url=url, cache_dir=cache_dir) as source: records = records = [record async for record in load(source)] tc.assertEqual(len(records), 5) tc.assertDictEqual( records[2].export(), { "key": "2", "features": { "feed": 0.2, "face": 2, "dead": 20, "beef": 200 }, "extra": {}, }, )
async def lookup_population(self, city: str, state: str): if city not in temperature_dataset_urls: raise Exception(f"City: {city} not found in dataset") if state not in population_dataset_urls: raise Exception(f"State: {state} not found in dataset") cache_dir = (pathlib.Path("~", ".cache", "dffml", "datasets", "population").expanduser().resolve()) filepath = await cached_download( population_dataset_urls[state]["url"], cache_dir / f"{state}.csv", population_dataset_urls[state]["expected_sha384_hash"], ) async for record in load(filepath): if export(record)["features"]["NAME"] == city: population = export(record)["features"]["POPESTIMATE2019"] yield {"population": population}
async def test_update(self): mydict = [{"A": 1, "B": 2, "C": 3}] df = pd.DataFrame(mydict) source = DataFrameSource( DataFrameSourceConfig(dataframe=df, predictions=["C", "B"])) # Save some data in the source await save( source, Record("1", data={"features": { "A": 4, "B": 5, "C": 6 }}), Record("2", data={"features": { "A": 7, "B": 8, "C": 9 }}), ) await save(source, Record("2", data={"features": { "A": 15, "B": 16, "C": 14 }})) records = [record async for record in load(source)] self.assertEqual(len(records), 3) self.assertDictEqual(records[0].features(), {"A": 1}) self.assertDictEqual( records[0].predictions(), { "B": { "confidence": 0.0, "value": 2 }, "C": { "confidence": 0.0, "value": 3 }, }, ) self.assertDictEqual(records[1].features(), {"A": 4}) self.assertDictEqual( records[1].predictions(), { "B": { "confidence": 0.0, "value": 5 }, "C": { "confidence": 0.0, "value": 6 }, }, ) self.assertDictEqual(records[2].features(), { "A": 15, }) self.assertDictEqual( records[2].predictions(), { "B": { "confidence": 0.0, "value": 16 }, "C": { "confidence": 0.0, "value": 14 }, }, )
async def test_dataframe(self): mydict = [{"A": 1, "B": 2, "C": 3}] df = pd.DataFrame(mydict) source = DataFrameSource( DataFrameSourceConfig( dataframe=df, predictions=["C"], )) # Save some data in the source await save( source, Record( "1", data={ "features": { "A": 4, "B": 5 }, "prediction": { "C": { "value": 6 } }, }, ), Record( "2", data={ "features": { "A": 7, "B": 8 }, "prediction": { "C": { "value": 9 } }, }, ), ) # Load all the records records = [record async for record in load(source)] self.assertIsInstance(records, list) self.assertEqual(len(records), 3) self.assertDictEqual(records[0].features(), {"A": 1, "B": 2}) self.assertDictEqual(records[0].predictions(), {"C": { "confidence": 0.0, "value": 3 }}) self.assertDictEqual(records[1].features(), {"A": 4, "B": 5}) self.assertDictEqual(records[1].predictions(), {"C": { "confidence": 0.0, "value": 6 }}) self.assertDictEqual(records[2].features(), {"A": 7, "B": 8}) self.assertDictEqual(records[2].predictions(), {"C": { "confidence": 0.0, "value": 9 }})