Exemple #1
0
    def test_parquet_mixed_ignores_none(self, tmpdir):
        path = tmpdir.join("output.pq")
        io_factory = LocalIoFactory(path)

        df = pd.DataFrame({"wat": [None, "la", "dispute"], "yep": [1, 2, 3]})

        with DatasetWriter(io_factory, lambda x: None, None) as f:
            f.parquet(df)
Exemple #2
0
    def test_reads_raw(self, tmpdir):
        path = tmpdir.join("input.bin")
        io_factory = LocalIoFactory(path)

        data = bytes([0, 1, 2, 3, 255])
        with open(path, "wb") as f:
            f.write(data)

        data_read = DatasetReader(io_factory).raw().read()

        assert data == data_read
Exemple #3
0
    def test_passes_extensions_to_callback(self, tmpdir):
        path = tmpdir.join("output.pq")
        io_factory = LocalIoFactory(path)

        extensions = {}

        on_close = Mock()

        with DatasetWriter(io_factory, on_close, extensions) as f:
            f.extensions()["foo"] = "bar"

        assert on_close.mock_calls == [call({"foo": "bar"})]
Exemple #4
0
    def test_reads_parquet(self, tmpdir):
        path = tmpdir.join("input.pq")
        io_factory = LocalIoFactory(path)

        df = pd.DataFrame({
            "foo": [0, 1, 2],
            "bar": ["hello", "goodbye", "oh dear"]
        })
        pq.write_table(pa.Table.from_pandas(df), path.strpath)

        df_read = DatasetReader(io_factory).parquet()

        pd.util.testing.assert_frame_equal(df, df_read)
Exemple #5
0
    def test_reads_json_with_non_ascii_chars(self, tmpdir):
        path = tmpdir.join("input.json")
        io_factory = LocalIoFactory(path)

        data = {"foo": "€10"}

        with open(path, "w") as f:
            json.dump(data, f, ensure_ascii=False
                      )  # See https://stackoverflow.com/a/14870531/129570

        data_read = DatasetReader(io_factory).json()

        assert data == data_read
Exemple #6
0
    def test_reads_csv_with_non_ascii_chars(self, tmpdir):
        path = tmpdir.join("input.csv")
        io_factory = LocalIoFactory(path)

        with open(path, "w") as f:
            writer = csv.DictWriter(f, fieldnames=["item", "price"])
            writer.writeheader()
            writer.writerow({"item": "baguette", "price": "€10"})

        df_read = DatasetReader(io_factory).csv()

        pd.util.testing.assert_frame_equal(
            pd.DataFrame.from_items([("item", ["baguette"]),
                                     ("price", ["€10"])]), df_read)
Exemple #7
0
    def test_parquet_with_mixed_type_columns(self, tmpdir):
        path = tmpdir.join("output.pq")
        io_factory = LocalIoFactory(path)

        df = pd.DataFrame({
            "foo": [0, "string", 2],  # Mixed type!
            "bar": [4, 5, 6],
        })

        with pytest.raises(ValueError) as excinfo:
            with DatasetWriter(io_factory, lambda x: None, None) as f:
                f.parquet(df)

        assert "bar" not in str(excinfo.value)
        assert "foo" in str(excinfo.value)
Exemple #8
0
    def test_parquet_succeeds(self, tmpdir):
        path = tmpdir.join("output.pq")
        io_factory = LocalIoFactory(path)

        df = pd.DataFrame({
            "foo": [0, 1, 2],
            "bar": ["hello", "goodbye", "oh dear"]
        })

        with DatasetWriter(io_factory, lambda x: None, None) as f:
            f.parquet(df)

        df_written = pq.read_table(path.strpath).to_pandas()

        pd.util.testing.assert_frame_equal(df, df_written)
Exemple #9
0
    def test_reads_csv(self, tmpdir):
        path = tmpdir.join("input.csv")
        io_factory = LocalIoFactory(path)

        with open(path, "w") as f:
            writer = csv.DictWriter(f, fieldnames=["name", "age"])
            writer.writeheader()
            writer.writerow({"name": "Alice Foo", "age": 25})
            writer.writerow({"name": "Bob Bar", "age": 36})
            writer.writerow({"name": "Charlie Baz", "age": 49})

        df_read = DatasetReader(io_factory).csv()

        pd.util.testing.assert_frame_equal(
            pd.DataFrame.from_items([("name",
                                      ["Alice Foo", "Bob Bar", "Charlie Baz"]),
                                     ("age", [25, 36, 49])]), df_read)
Exemple #10
0
    def test_reads_json(self, tmpdir):
        path = tmpdir.join("input.json")
        io_factory = LocalIoFactory(path)

        data = {
            "foo": [0, 1, 2],
            "bar": {
                "hello": 5,
                "goodbye": True,
                "oh dear": None
            }
        }

        with open(path, "w") as f:
            json.dump(data, f)

        data_read = DatasetReader(io_factory).json()

        assert data == data_read