async def test_csv_tag(self): with non_existant_tempfile() as csv_tempfile: # Move the pre-populated json data to a csv source with self.subTest(json_to_csv=True): await Merge.cli( "dest=csv", "src=json", "-source-dest-filename", csv_tempfile, "-source-src-filename", self.temp_filename, "-source-src-allowempty", "-source-dest-allowempty", "-source-src-readwrite", "-source-dest-readwrite", ) # Merge one tag to another within the same file with self.subTest(merge_same_file=True): await Merge.cli( "dest=csv", "src=csv", "-source-dest-filename", csv_tempfile, "-source-dest-tag", "sometag", "-source-src-filename", csv_tempfile, "-source-src-allowempty", "-source-dest-allowempty", "-source-src-readwrite", "-source-dest-readwrite", ) contents = Path(csv_tempfile).read_text() self.assertIn("untagged", contents) self.assertIn("sometag", contents) # Check the untagged source with self.subTest(tagged=None): async with CSVSource( CSVSourceConfig(filename=csv_tempfile) ) as source: async with source() as sctx: repos = [repo async for repo in sctx.repos()] self.assertEqual(len(repos), len(self.repos)) contents = Path(csv_tempfile).read_text() self.assertIn("sometag", contents) self.assertIn("untagged", contents) # Check the tagged source with self.subTest(tagged="sometag"): async with CSVSource( CSVSourceConfig(filename=csv_tempfile, tag="sometag") ) as source: async with source() as sctx: repos = [repo async for repo in sctx.repos()] self.assertEqual(len(repos), len(self.repos)) contents = Path(csv_tempfile).read_text() self.assertIn("sometag", contents) self.assertIn("untagged", contents)
async def test_key(self): with tempfile.NamedTemporaryFile() as fileobj: fileobj.write(b"KeyHeader,ValueColumn\n") fileobj.write(b"a,42\n") fileobj.write(b"b,420\n") fileobj.seek(0) async with CSVSource( CSVSourceConfig(filename=fileobj.name, key="KeyHeader")) as source: async with source() as sctx: record_a = await sctx.record("a") record_b = await sctx.record("b") self.assertEqual(record_a.feature("ValueColumn"), 42) self.assertEqual(record_b.feature("ValueColumn"), 420)
async def test_source(self): config = parse_unknown("--source-filename", "dataset.csv", "--source-readonly") async with self.post("/configure/source/csv/salary", json=config) as r: self.assertEqual(await r.json(), OK) self.assertIn("salary", self.cli.app["sources"]) self.assertEqual( self.cli.app["sources"]["salary"].config, CSVSourceConfig( filename="dataset.csv", label="unlabeled", readonly=True, key="src_url", label_column="label", ), )
async def test_key(self): with tempfile.TemporaryDirectory() as testdir: testfile = os.path.join(testdir, str(random.random())) pathlib.Path(testfile).write_text( inspect.cleandoc(""" KeyHeader,ValueColumn a,42 b,420 """)) async with CSVSource( CSVSourceConfig(filename=testfile, key="KeyHeader")) as source: async with source() as sctx: record_a = await sctx.record("a") record_b = await sctx.record("b") self.assertEqual(record_a.feature("ValueColumn"), 42) self.assertEqual(record_b.feature("ValueColumn"), 420)
async def test_source(self): config = parse_unknown("--source-filename", "dataset.csv", "-source-allowempty") async with self.post("/configure/source/csv/salary", json=config) as r: self.assertEqual(await r.json(), OK) self.assertIn("salary", self.cli.app["sources"]) self.assertEqual( self.cli.app["sources"]["salary"].config, CSVSourceConfig( filename=pathlib.Path("dataset.csv"), tag="untagged", key="key", tagcol="tag", allowempty=True, ), ) with self.subTest(context="salaryctx"): async with self.get("/context/source/salary/salaryctx") as r: self.assertEqual(await r.json(), OK) self.assertIn("salaryctx", self.cli.app["source_contexts"])
async def setUpSource(self): return CSVSource( CSVSourceConfig(filename=self.testfile, allowempty=True, readwrite=True))
async def setUpSource(self): return CSVSource(CSVSourceConfig(filename=self.testfile))
from dffml.cli.ml import Train, Accuracy, PredictAll from dffml.feature.feature import Features, DefFeature from dffml.source.csv import CSVSource, CSVSourceConfig from dffml_model_tensorflow.dnnr import ( DNNRegressionModel, DNNRegressionModelConfig, ) training_data = CSVSource( CSVSourceConfig(filename="training.csv", readonly=True)) test_data = CSVSource(CSVSourceConfig(filename="test.csv", readonly=True)) predict_data = CSVSource(CSVSourceConfig(filename="predict.csv", readonly=True)) model = DNNRegressionModel( DNNRegressionModelConfig( features=Features( DefFeature("Years", int, 1), DefFeature("Expertise", int, 1), DefFeature("Trust", float, 1), ), predict="Salary", )) Train(model=model, sources=[training_data])() accuracy = Accuracy(model=model, sources=[test_data])() row0, row1 = PredictAll(model=model, sources=[predict_data])() print("Accuracy", accuracy)
from dffml.cli.ml import Train, Accuracy, PredictAll from dffml.feature.feature import Features, DefFeature from dffml.source.csv import CSVSource, CSVSourceConfig from dffml_model_tensorflow.dnnr import ( DNNRegressionModel, DNNRegressionModelConfig, ) training_data = CSVSource( CSVSourceConfig(filename="training.csv", readwrite=False)) test_data = CSVSource(CSVSourceConfig(filename="test.csv", readwrite=False)) predict_data = CSVSource( CSVSourceConfig(filename="predict.csv", readwrite=False)) model = DNNRegressionModel( DNNRegressionModelConfig( features=Features( DefFeature("Years", int, 1), DefFeature("Expertise", int, 1), DefFeature("Trust", float, 1), ), predict=DefFeature("Salary", float, 1), )) Train(model=model, sources=[training_data])() accuracy = Accuracy(model=model, sources=[test_data])() row0, row1 = PredictAll(model=model, sources=[predict_data])() print("Accuracy", accuracy)