Esempio n. 1
0
    async def test_save_and_load(self):
        source = CSVSource(
            filename=self.save_and_load, allowempty=True, readwrite=True
        )
        await save(
            source,
            Record(
                "1",
                data={
                    "features": {"A": 0, "B": 1},
                    "prediction": {"C": {"value": 1, "confidence": 1.0}},
                },
            ),
            Record(
                "2",
                data={
                    "features": {"A": 3, "B": 4},
                    "prediction": {"C": {"value": 2, "confidence": 1.0}},
                },
            ),
        )
        # All records in source
        results = [record.export() async for record in load(source)]
        self.assertEqual(
            results,
            [
                {
                    "key": "1",
                    "features": {"A": 0, "B": 1},
                    "prediction": {"C": {"confidence": 1.0, "value": "1"}},
                    "extra": {},
                },
                {
                    "key": "2",
                    "features": {"A": 3, "B": 4},
                    "prediction": {"C": {"confidence": 1.0, "value": "2"}},
                    "extra": {},
                },
            ],
        )

        # For specific records in a source
        results = [record.export() async for record in load(source, "1")]
        self.assertEqual(
            results,
            [
                {
                    "key": "1",
                    "features": {"A": 0, "B": 1},
                    "prediction": {"C": {"confidence": 1.0, "value": "1"}},
                    "extra": {},
                }
            ],
        )
Esempio n. 2
0
async def main():
    # Grab arguments from command line
    url = sys.argv[1]
    cache_dir = pathlib.Path(sys.argv[2])

    # Usage via Source class set as property .source of function
    records = [
        record async for record in load(
            my_training_dataset.source(url=url, cache_dir=cache_dir))
    ]

    # Create a test case to do comparisons
    tc = unittest.TestCase()

    tc.assertEqual(len(records), 5)
    tc.assertDictEqual(
        records[0].export(),
        {
            "key": "0",
            "features": {
                "feed": 0.0,
                "face": 0,
                "dead": 0,
                "beef": 0
            },
            "extra": {},
        },
    )

    # Usage as context manager to create source
    async with my_training_dataset(url=url, cache_dir=cache_dir) as source:
        records = records = [record async for record in load(source)]
        tc.assertEqual(len(records), 5)
        tc.assertDictEqual(
            records[2].export(),
            {
                "key": "2",
                "features": {
                    "feed": 0.2,
                    "face": 2,
                    "dead": 20,
                    "beef": 200
                },
                "extra": {},
            },
        )
Esempio n. 3
0
async def lookup_population(self, city: str, state: str):
    if city not in temperature_dataset_urls:
        raise Exception(f"City: {city} not found in dataset")

    if state not in population_dataset_urls:
        raise Exception(f"State: {state} not found in dataset")

    cache_dir = (pathlib.Path("~", ".cache", "dffml", "datasets",
                              "population").expanduser().resolve())

    filepath = await cached_download(
        population_dataset_urls[state]["url"],
        cache_dir / f"{state}.csv",
        population_dataset_urls[state]["expected_sha384_hash"],
    )
    async for record in load(filepath):
        if export(record)["features"]["NAME"] == city:
            population = export(record)["features"]["POPESTIMATE2019"]
            yield {"population": population}
Esempio n. 4
0
    async def test_update(self):

        mydict = [{"A": 1, "B": 2, "C": 3}]
        df = pd.DataFrame(mydict)

        source = DataFrameSource(
            DataFrameSourceConfig(dataframe=df, predictions=["C", "B"]))
        # Save some data in the source
        await save(
            source,
            Record("1", data={"features": {
                "A": 4,
                "B": 5,
                "C": 6
            }}),
            Record("2", data={"features": {
                "A": 7,
                "B": 8,
                "C": 9
            }}),
        )

        await save(source,
                   Record("2", data={"features": {
                       "A": 15,
                       "B": 16,
                       "C": 14
                   }}))

        records = [record async for record in load(source)]
        self.assertEqual(len(records), 3)
        self.assertDictEqual(records[0].features(), {"A": 1})
        self.assertDictEqual(
            records[0].predictions(),
            {
                "B": {
                    "confidence": 0.0,
                    "value": 2
                },
                "C": {
                    "confidence": 0.0,
                    "value": 3
                },
            },
        )
        self.assertDictEqual(records[1].features(), {"A": 4})
        self.assertDictEqual(
            records[1].predictions(),
            {
                "B": {
                    "confidence": 0.0,
                    "value": 5
                },
                "C": {
                    "confidence": 0.0,
                    "value": 6
                },
            },
        )
        self.assertDictEqual(records[2].features(), {
            "A": 15,
        })
        self.assertDictEqual(
            records[2].predictions(),
            {
                "B": {
                    "confidence": 0.0,
                    "value": 16
                },
                "C": {
                    "confidence": 0.0,
                    "value": 14
                },
            },
        )
Esempio n. 5
0
    async def test_dataframe(self):

        mydict = [{"A": 1, "B": 2, "C": 3}]
        df = pd.DataFrame(mydict)

        source = DataFrameSource(
            DataFrameSourceConfig(
                dataframe=df,
                predictions=["C"],
            ))
        # Save some data in the source
        await save(
            source,
            Record(
                "1",
                data={
                    "features": {
                        "A": 4,
                        "B": 5
                    },
                    "prediction": {
                        "C": {
                            "value": 6
                        }
                    },
                },
            ),
            Record(
                "2",
                data={
                    "features": {
                        "A": 7,
                        "B": 8
                    },
                    "prediction": {
                        "C": {
                            "value": 9
                        }
                    },
                },
            ),
        )

        # Load all the records
        records = [record async for record in load(source)]

        self.assertIsInstance(records, list)
        self.assertEqual(len(records), 3)
        self.assertDictEqual(records[0].features(), {"A": 1, "B": 2})
        self.assertDictEqual(records[0].predictions(),
                             {"C": {
                                 "confidence": 0.0,
                                 "value": 3
                             }})
        self.assertDictEqual(records[1].features(), {"A": 4, "B": 5})
        self.assertDictEqual(records[1].predictions(),
                             {"C": {
                                 "confidence": 0.0,
                                 "value": 6
                             }})
        self.assertDictEqual(records[2].features(), {"A": 7, "B": 8})
        self.assertDictEqual(records[2].predictions(),
                             {"C": {
                                 "confidence": 0.0,
                                 "value": 9
                             }})