Esempio n. 1
0
 async def test_csv_tag(self):
     with non_existant_tempfile() as csv_tempfile:
         # Move the pre-populated json data to a csv source
         with self.subTest(json_to_csv=True):
             await Merge.cli(
                 "dest=csv",
                 "src=json",
                 "-source-dest-filename",
                 csv_tempfile,
                 "-source-src-filename",
                 self.temp_filename,
                 "-source-src-allowempty",
                 "-source-dest-allowempty",
                 "-source-src-readwrite",
                 "-source-dest-readwrite",
             )
         # Merge one tag to another within the same file
         with self.subTest(merge_same_file=True):
             await Merge.cli(
                 "dest=csv",
                 "src=csv",
                 "-source-dest-filename",
                 csv_tempfile,
                 "-source-dest-tag",
                 "sometag",
                 "-source-src-filename",
                 csv_tempfile,
                 "-source-src-allowempty",
                 "-source-dest-allowempty",
                 "-source-src-readwrite",
                 "-source-dest-readwrite",
             )
         contents = Path(csv_tempfile).read_text()
         self.assertIn("untagged", contents)
         self.assertIn("sometag", contents)
         # Check the untagged source
         with self.subTest(tagged=None):
             async with CSVSource(
                 CSVSourceConfig(filename=csv_tempfile)
             ) as source:
                 async with source() as sctx:
                     repos = [repo async for repo in sctx.repos()]
                     self.assertEqual(len(repos), len(self.repos))
         contents = Path(csv_tempfile).read_text()
         self.assertIn("sometag", contents)
         self.assertIn("untagged", contents)
         # Check the tagged source
         with self.subTest(tagged="sometag"):
             async with CSVSource(
                 CSVSourceConfig(filename=csv_tempfile, tag="sometag")
             ) as source:
                 async with source() as sctx:
                     repos = [repo async for repo in sctx.repos()]
                     self.assertEqual(len(repos), len(self.repos))
         contents = Path(csv_tempfile).read_text()
         self.assertIn("sometag", contents)
         self.assertIn("untagged", contents)
Esempio n. 2
0
 async def test_key(self):
     with tempfile.NamedTemporaryFile() as fileobj:
         fileobj.write(b"KeyHeader,ValueColumn\n")
         fileobj.write(b"a,42\n")
         fileobj.write(b"b,420\n")
         fileobj.seek(0)
         async with CSVSource(
                 CSVSourceConfig(filename=fileobj.name,
                                 key="KeyHeader")) as source:
             async with source() as sctx:
                 record_a = await sctx.record("a")
                 record_b = await sctx.record("b")
                 self.assertEqual(record_a.feature("ValueColumn"), 42)
                 self.assertEqual(record_b.feature("ValueColumn"), 420)
Esempio n. 3
0
 async def test_source(self):
     config = parse_unknown("--source-filename", "dataset.csv",
                            "--source-readonly")
     async with self.post("/configure/source/csv/salary", json=config) as r:
         self.assertEqual(await r.json(), OK)
         self.assertIn("salary", self.cli.app["sources"])
         self.assertEqual(
             self.cli.app["sources"]["salary"].config,
             CSVSourceConfig(
                 filename="dataset.csv",
                 label="unlabeled",
                 readonly=True,
                 key="src_url",
                 label_column="label",
             ),
         )
Esempio n. 4
0
 async def test_key(self):
     with tempfile.TemporaryDirectory() as testdir:
         testfile = os.path.join(testdir, str(random.random()))
         pathlib.Path(testfile).write_text(
             inspect.cleandoc("""
                 KeyHeader,ValueColumn
                 a,42
                 b,420
                 """))
         async with CSVSource(
                 CSVSourceConfig(filename=testfile,
                                 key="KeyHeader")) as source:
             async with source() as sctx:
                 record_a = await sctx.record("a")
                 record_b = await sctx.record("b")
                 self.assertEqual(record_a.feature("ValueColumn"), 42)
                 self.assertEqual(record_b.feature("ValueColumn"), 420)
Esempio n. 5
0
 async def test_source(self):
     config = parse_unknown("--source-filename", "dataset.csv",
                            "-source-allowempty")
     async with self.post("/configure/source/csv/salary", json=config) as r:
         self.assertEqual(await r.json(), OK)
         self.assertIn("salary", self.cli.app["sources"])
         self.assertEqual(
             self.cli.app["sources"]["salary"].config,
             CSVSourceConfig(
                 filename=pathlib.Path("dataset.csv"),
                 tag="untagged",
                 key="key",
                 tagcol="tag",
                 allowempty=True,
             ),
         )
         with self.subTest(context="salaryctx"):
             async with self.get("/context/source/salary/salaryctx") as r:
                 self.assertEqual(await r.json(), OK)
                 self.assertIn("salaryctx", self.cli.app["source_contexts"])
Esempio n. 6
0
 async def setUpSource(self):
     return CSVSource(
         CSVSourceConfig(filename=self.testfile,
                         allowempty=True,
                         readwrite=True))
Esempio n. 7
0
 async def setUpSource(self):
     return CSVSource(CSVSourceConfig(filename=self.testfile))
Esempio n. 8
0
from dffml.cli.ml import Train, Accuracy, PredictAll
from dffml.feature.feature import Features, DefFeature
from dffml.source.csv import CSVSource, CSVSourceConfig
from dffml_model_tensorflow.dnnr import (
    DNNRegressionModel,
    DNNRegressionModelConfig,
)

training_data = CSVSource(
    CSVSourceConfig(filename="training.csv", readonly=True))
test_data = CSVSource(CSVSourceConfig(filename="test.csv", readonly=True))
predict_data = CSVSource(CSVSourceConfig(filename="predict.csv",
                                         readonly=True))

model = DNNRegressionModel(
    DNNRegressionModelConfig(
        features=Features(
            DefFeature("Years", int, 1),
            DefFeature("Expertise", int, 1),
            DefFeature("Trust", float, 1),
        ),
        predict="Salary",
    ))

Train(model=model, sources=[training_data])()

accuracy = Accuracy(model=model, sources=[test_data])()

row0, row1 = PredictAll(model=model, sources=[predict_data])()

print("Accuracy", accuracy)
Esempio n. 9
0
from dffml.cli.ml import Train, Accuracy, PredictAll
from dffml.feature.feature import Features, DefFeature
from dffml.source.csv import CSVSource, CSVSourceConfig
from dffml_model_tensorflow.dnnr import (
    DNNRegressionModel,
    DNNRegressionModelConfig,
)

training_data = CSVSource(
    CSVSourceConfig(filename="training.csv", readwrite=False))
test_data = CSVSource(CSVSourceConfig(filename="test.csv", readwrite=False))
predict_data = CSVSource(
    CSVSourceConfig(filename="predict.csv", readwrite=False))

model = DNNRegressionModel(
    DNNRegressionModelConfig(
        features=Features(
            DefFeature("Years", int, 1),
            DefFeature("Expertise", int, 1),
            DefFeature("Trust", float, 1),
        ),
        predict=DefFeature("Salary", float, 1),
    ))

Train(model=model, sources=[training_data])()

accuracy = Accuracy(model=model, sources=[test_data])()

row0, row1 = PredictAll(model=model, sources=[predict_data])()

print("Accuracy", accuracy)