Python FileReader.consumeの例

プログラミング言語: Python

名前空間/パッケージ名: butterfree.extract.readers

クラス/型: FileReader

メソッド/関数: consume

hotexamples.comのコード掲載数: 4

Python FileReader.consume - 4件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのbutterfree.extract.readers.FileReader.consumeの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

FileReader(17)

_apply_transformations(4)

consume(4)

with_(4)

build(2)

transformations(1)

コード例 #1

ファイルを表示

ファイル: test_file_reader.py プロジェクト: zuston/butterfree

    def test_csv_file_with_schema_and_header(self):
        # given
        spark_client = SparkClient()
        schema_csv = StructType(
            [
                StructField("A", LongType()),
                StructField("B", DoubleType()),
                StructField("C", StringType()),
            ]
        )

        file = "tests/unit/butterfree/extract/readers/file-reader-test.csv"

        # when
        file_reader = FileReader(
            id="id",
            path=file,
            format="csv",
            schema=schema_csv,
            format_options={"header": True},
        )
        df = file_reader.consume(spark_client)

        # assert
        assert schema_csv == df.schema
        assert df.columns == ["A", "B", "C"]
        for value in range(3):
            assert df.first()[value] != ["A", "B", "C"][value]

コード例 #2

ファイルを表示

ファイル: test_file_reader.py プロジェクト: zuston/butterfree

    def test_consume_with_stream_without_schema(self, spark_client, target_df):
        # arrange
        path = "path/to/file.json"
        format = "json"
        schema = None
        format_options = None
        stream = True
        options = dict({"path": path})

        spark_client.read.return_value = target_df
        file_reader = FileReader(
            "test", path, format, schema, format_options, stream=stream
        )

        # act
        output_df = file_reader.consume(spark_client)

        # assert

        # assert call for schema infer
        spark_client.read.assert_any_call(format=format, options=options)
        # assert call for stream read
        # stream
        spark_client.read.assert_called_with(
            format=format, options=options, schema=output_df.schema, stream=stream
        )
        assert target_df.collect() == output_df.collect()

コード例 #3

ファイルを表示

ファイル: test_file_reader.py プロジェクト: zuston/butterfree

    def test_consume(
        self, path, format, schema, format_options, spark_client, target_df
    ):
        # arrange
        spark_client.read.return_value = target_df
        file_reader = FileReader("test", path, format, schema, format_options)

        # act
        output_df = file_reader.consume(spark_client)
        options = dict({"path": path}, **format_options if format_options else {})

        # assert
        spark_client.read.assert_called_once_with(
            format=format, options=options, schema=schema, stream=False
        )
        assert target_df.collect() == output_df.collect()

コード例 #4

ファイルを表示

ファイル: test_file_reader.py プロジェクト: zuston/butterfree

    def test_json_file_with_schema(self):
        # given
        spark_client = SparkClient()
        schema_json = StructType(
            [
                StructField("A", StringType()),
                StructField("B", DoubleType()),
                StructField("C", StringType()),
            ]
        )

        file = "tests/unit/butterfree/extract/readers/file-reader-test.json"

        # when
        file_reader = FileReader(id="id", path=file, format="json", schema=schema_json)
        df = file_reader.consume(spark_client)

        # assert
        assert schema_json == df.schema