Python CSVSource Examples, dffml.source.csv.CSVSource Python Examples

Example #1

0

Show file

File: test_noasync.py Project: sakshamarora1/dffml

    def test_predict(self):
        self.required_plugins("dffml-model-scikit")
        # Import SciKit modules
        dffml_model_scikit = importlib.import_module("dffml_model_scikit")
        # Instantiate the model
        model = dffml_model_scikit.LinearRegressionModel(
            directory=self.mktempdir(),
            predict=Feature("Salary", int, 1),
            features=Features(
                Feature("Years", int, 1),
                Feature("Expertise", int, 1),
                Feature("Trust", float, 1),
            ),
        )

        training_data = CSVSource(filename=self.train_filename)
        test_data = CSVSource(filename=self.test_filename)
        predict_data = CSVSource(filename=self.predict_filename)

        # Train the model
        train(model, training_data)
        # Assess accuracy
        accuracy(model, test_data)
        # Make prediction
        predictions = [
            prediction for prediction in predict(model, predict_data)
        ]
        self.assertEqual(round(predictions[0][2]["Salary"]["value"]), 70)
        self.assertEqual(round(predictions[1][2]["Salary"]["value"]), 80)

Example #2

0

Show file

 async def test_csv_tag(self):
     with non_existant_tempfile() as csv_tempfile:
         # Move the pre-populated json data to a csv source
         with self.subTest(json_to_csv=True):
             await Merge.cli(
                 "dest=csv",
                 "src=json",
                 "-source-dest-filename",
                 csv_tempfile,
                 "-source-src-filename",
                 self.temp_filename,
                 "-source-src-allowempty",
                 "-source-dest-allowempty",
                 "-source-src-readwrite",
                 "-source-dest-readwrite",
             )
         # Merge one tag to another within the same file
         with self.subTest(merge_same_file=True):
             await Merge.cli(
                 "dest=csv",
                 "src=csv",
                 "-source-dest-filename",
                 csv_tempfile,
                 "-source-dest-tag",
                 "sometag",
                 "-source-src-filename",
                 csv_tempfile,
                 "-source-src-allowempty",
                 "-source-dest-allowempty",
                 "-source-src-readwrite",
                 "-source-dest-readwrite",
             )
         contents = Path(csv_tempfile).read_text()
         self.assertIn("untagged", contents)
         self.assertIn("sometag", contents)
         # Check the untagged source
         with self.subTest(tagged=None):
             async with CSVSource(
                 CSVSourceConfig(filename=csv_tempfile)
             ) as source:
                 async with source() as sctx:
                     repos = [repo async for repo in sctx.repos()]
                     self.assertEqual(len(repos), len(self.repos))
         contents = Path(csv_tempfile).read_text()
         self.assertIn("sometag", contents)
         self.assertIn("untagged", contents)
         # Check the tagged source
         with self.subTest(tagged="sometag"):
             async with CSVSource(
                 CSVSourceConfig(filename=csv_tempfile, tag="sometag")
             ) as source:
                 async with source() as sctx:
                     repos = [repo async for repo in sctx.repos()]
                     self.assertEqual(len(repos), len(self.repos))
         contents = Path(csv_tempfile).read_text()
         self.assertIn("sometag", contents)
         self.assertIn("untagged", contents)

Example #3

0

Show file

File: test_csv.py Project: pradeepbhadani/dffml

 def test_config_readonly_default(self):
     config = CSVSource.config(
         parse_unknown("--source-csv-filename", "feedface"))
     self.assertEqual(config.filename, "feedface")
     self.assertEqual(config.label, "unlabeled")
     self.assertEqual(config.key, None)
     self.assertFalse(config.readonly)

Example #4

0

Show file

File: test_csv.py Project: robertdigital/dffml

 def test_config_default(self):
     config = CSVSource.config(
         parse_unknown("--source-csv-filename", "feedface"))
     self.assertEqual(config.filename, "feedface")
     self.assertEqual(config.tag, "untagged")
     self.assertEqual(config.tagcol, "tag")
     self.assertEqual(config.key, "key")
     self.assertFalse(config.readwrite)
     self.assertFalse(config.allowempty)

Example #5

0

Show file

File: test_csv.py Project: sauravsrijan/dffml

 def test_config_default(self):
     config = CSVSource.config(
         parse_unknown("--source-csv-filename", "feedface"))
     self.assertEqual(config.filename, "feedface")
     self.assertEqual(config.label, "unlabeled")
     self.assertEqual(config.labelcol, "label")
     self.assertEqual(config.key, "src_url")
     self.assertFalse(config.readwrite)
     self.assertFalse(config.allowempty)

Example #6

0

Show file

File: test_high_level.py Project: up1512001/dffml

    async def test_save_and_load(self):
        source = CSVSource(
            filename=self.save_and_load, allowempty=True, readwrite=True
        )
        await save(
            source,
            Record(
                "1",
                data={
                    "features": {"A": 0, "B": 1},
                    "prediction": {"C": {"value": 1, "confidence": 1.0}},
                },
            ),
            Record(
                "2",
                data={
                    "features": {"A": 3, "B": 4},
                    "prediction": {"C": {"value": 2, "confidence": 1.0}},
                },
            ),
        )
        # All records in source
        results = [record.export() async for record in load(source)]
        self.assertEqual(
            results,
            [
                {
                    "key": "1",
                    "features": {"A": 0, "B": 1},
                    "prediction": {"C": {"confidence": 1.0, "value": "1"}},
                    "extra": {},
                },
                {
                    "key": "2",
                    "features": {"A": 3, "B": 4},
                    "prediction": {"C": {"confidence": 1.0, "value": "2"}},
                    "extra": {},
                },
            ],
        )

        # For specific records in a source
        results = [record.export() async for record in load(source, "1")]
        self.assertEqual(
            results,
            [
                {
                    "key": "1",
                    "features": {"A": 0, "B": 1},
                    "prediction": {"C": {"confidence": 1.0, "value": "1"}},
                    "extra": {},
                }
            ],
        )

Example #7

0

Show file

File: test_high_level.py Project: programmer290399/dffml

    async def test_predict(self):
        self.required_plugins("dffml-model-scikit")
        # Import SciKit modules
        dffml_model_scikit = importlib.import_module("dffml_model_scikit")
        # Instantiate the model
        model = dffml_model_scikit.LinearRegressionModel(
            location=self.mktempdir(),
            predict=Feature("Salary", int, 1),
            features=Features(
                Feature("Years", int, 1),
                Feature("Expertise", int, 1),
                Feature("Trust", float, 1),
            ),
        )

        training_data = CSVSource(filename=self.train_filename)
        test_data = CSVSource(filename=self.test_filename)
        predict_data = CSVSource(filename=self.predict_filename)

        # Train the model
        await train(model, training_data)
        # Assess accuracy
        scorer = MeanSquaredErrorAccuracy()
        await score(model, scorer, Feature("Salary", int, 1), test_data)
        # Make prediction
        predictions = [
            prediction async for prediction in predict(model, predict_data)
        ]
        self.assertEqual(round(predictions[0][2]["Salary"]["value"]), 70)
        self.assertEqual(round(predictions[1][2]["Salary"]["value"]), 80)

        # Test input data as list
        await train(model, *self.train_data)
        await score(model, scorer, Feature("Salary", int, 1), *self.test_data)
        predictions = [
            prediction
            async for prediction in predict(model, *self.predict_data)
        ]
        self.assertEqual(round(predictions[0][2]["Salary"]["value"]), 70)
        self.assertEqual(round(predictions[1][2]["Salary"]["value"]), 80)

Example #8

0

Show file

File: test_csv.py Project: robertdigital/dffml

 async def test_key(self):
     with tempfile.NamedTemporaryFile() as fileobj:
         fileobj.write(b"KeyHeader,ValueColumn\n")
         fileobj.write(b"a,42\n")
         fileobj.write(b"b,420\n")
         fileobj.seek(0)
         async with CSVSource(
                 CSVSourceConfig(filename=fileobj.name,
                                 key="KeyHeader")) as source:
             async with source() as sctx:
                 record_a = await sctx.record("a")
                 record_b = await sctx.record("b")
                 self.assertEqual(record_a.feature("ValueColumn"), 42)
                 self.assertEqual(record_b.feature("ValueColumn"), 420)

Example #9

0

Show file

File: test_csv.py Project: pradeepbhadani/dffml

 def test_config_readonly_set(self):
     config = CSVSource.config(
         parse_unknown(
             "--source-csv-filename",
             "feedface",
             "--source-csv-label",
             "default-label",
             "--source-csv-key",
             "SourceURLColumn",
             "--source-csv-readonly",
         ))
     self.assertEqual(config.filename, "feedface")
     self.assertEqual(config.label, "default-label")
     self.assertEqual(config.key, "SourceURLColumn")
     self.assertTrue(config.readonly)

Example #10

0

Show file

File: test_csv.py Project: oliverob/dffml

 async def test_key(self):
     with tempfile.TemporaryDirectory() as testdir:
         testfile = os.path.join(testdir, str(random.random()))
         pathlib.Path(testfile).write_text(
             inspect.cleandoc("""
                 KeyHeader,ValueColumn
                 a,42
                 b,420
                 """))
         async with CSVSource(
                 CSVSourceConfig(filename=testfile,
                                 key="KeyHeader")) as source:
             async with source() as sctx:
                 record_a = await sctx.record("a")
                 record_b = await sctx.record("b")
                 self.assertEqual(record_a.feature("ValueColumn"), 42)
                 self.assertEqual(record_b.feature("ValueColumn"), 420)

Example #11

0

Show file

File: my_training.py Project: sakshamarora1/dffml

async def my_training_dataset(
    url: str = "http://download.example.com/data/my_training.csv",
    expected_sha384_hash:
    str = "db9ec70abdc8b74bcf91a7399144dd15fc01e3dad91bbbe3c41fbbe33065b98a3e06e8e0ba053d850d7dc19e6837310e",
    cache_dir: pathlib.Path = (pathlib.Path("~", ".cache", "dffml", "datasets",
                                            "my").expanduser().resolve()),
):
    # Download the file from the url give, place the downloaded file at
    # ~/.cache/dffml/datasets/my/training.csv. Ensure the SHA 384 hash
    # of the download's contents is equal the the expected value
    filepath = await cached_download(
        url,
        cache_dir / "training.csv",
        expected_sha384_hash,
        protocol_allowlist=["http://"] + DEFAULT_PROTOCOL_ALLOWLIST,
    )
    # Create a source using downloaded file
    yield CSVSource(filename=str(filepath))

Example #12

0

Show file

File: test_csv.py Project: robertdigital/dffml

 def test_config_set(self):
     config = CSVSource.config(
         parse_unknown(
             "--source-csv-filename",
             "feedface",
             "--source-csv-tag",
             "default-tag",
             "--source-csv-tagcol",
             "dffml_tag",
             "--source-csv-key",
             "SourceURLColumn",
             "--source-csv-readwrite",
             "--source-csv-allowempty",
         ))
     self.assertEqual(config.filename, "feedface")
     self.assertEqual(config.tag, "default-tag")
     self.assertEqual(config.tagcol, "dffml_tag")
     self.assertEqual(config.key, "SourceURLColumn")
     self.assertTrue(config.readwrite)
     self.assertTrue(config.allowempty)

Example #13

0

Show file

File: test_csv.py Project: robertdigital/dffml

 async def setUpSource(self):
     return CSVSource(
         CSVSourceConfig(filename=self.testfile,
                         allowempty=True,
                         readwrite=True))

Example #14

0

Show file

File: test_csv.py Project: pradeepbhadani/dffml

 async def setUpSource(self):
     return CSVSource(CSVSourceConfig(filename=self.testfile))

Example #15

0

Show file

from dffml.cli.ml import Train, Accuracy, PredictAll
from dffml.feature.feature import Features, DefFeature
from dffml.source.csv import CSVSource, CSVSourceConfig
from dffml_model_tensorflow.dnnr import (
    DNNRegressionModel,
    DNNRegressionModelConfig,
)

training_data = CSVSource(
    CSVSourceConfig(filename="training.csv", readonly=True))
test_data = CSVSource(CSVSourceConfig(filename="test.csv", readonly=True))
predict_data = CSVSource(CSVSourceConfig(filename="predict.csv",
                                         readonly=True))

model = DNNRegressionModel(
    DNNRegressionModelConfig(
        features=Features(
            DefFeature("Years", int, 1),
            DefFeature("Expertise", int, 1),
            DefFeature("Trust", float, 1),
        ),
        predict="Salary",
    ))

Train(model=model, sources=[training_data])()

accuracy = Accuracy(model=model, sources=[test_data])()

row0, row1 = PredictAll(model=model, sources=[predict_data])()

print("Accuracy", accuracy)

Example #16

0

Show file

from dffml.cli.ml import Train, Accuracy, PredictAll
from dffml.feature.feature import Features, DefFeature
from dffml.source.csv import CSVSource, CSVSourceConfig
from dffml_model_tensorflow.dnnr import (
    DNNRegressionModel,
    DNNRegressionModelConfig,
)

training_data = CSVSource(
    CSVSourceConfig(filename="training.csv", readwrite=False))
test_data = CSVSource(CSVSourceConfig(filename="test.csv", readwrite=False))
predict_data = CSVSource(
    CSVSourceConfig(filename="predict.csv", readwrite=False))

model = DNNRegressionModel(
    DNNRegressionModelConfig(
        features=Features(
            DefFeature("Years", int, 1),
            DefFeature("Expertise", int, 1),
            DefFeature("Trust", float, 1),
        ),
        predict=DefFeature("Salary", float, 1),
    ))

Train(model=model, sources=[training_data])()

accuracy = Accuracy(model=model, sources=[test_data])()

row0, row1 = PredictAll(model=model, sources=[predict_data])()

print("Accuracy", accuracy)