コード例 #1
0
    def write(
        self,
        df: DataFrame,
        path,
        compression="snappy",
        index=None,
        partition_cols=None,
        **kwargs,
    ):
        self.validate_dataframe(df)
        # thriftpy/protocol/compact.py:339:
        # DeprecationWarning: tostring() is deprecated.
        # Use tobytes() instead.

        if "partition_on" in kwargs and partition_cols is not None:
            raise ValueError(
                "Cannot use both partition_on and "
                "partition_cols. Use partition_cols for "
                "partitioning data"
            )
        elif "partition_on" in kwargs:
            partition_cols = kwargs.pop("partition_on")

        if partition_cols is not None:
            kwargs["file_scheme"] = "hive"

        if is_s3_url(path) or is_gcs_url(path):
            # if path is s3:// or gs:// we need to open the file in 'wb' mode.
            # TODO: Support 'ab'

            path, _, _, _ = get_filepath_or_buffer(path, mode="wb")
            # And pass the opened file to the fastparquet internal impl.
            kwargs["open_with"] = lambda path, _: path
        else:
            path, _, _, _ = get_filepath_or_buffer(path)

        with catch_warnings(record=True):
            self.api.write(
                path,
                df,
                compression=compression,
                write_index=index,
                partition_on=partition_cols,
                **kwargs,
            )
コード例 #2
0
ファイル: test_gcs.py プロジェクト: TomAugspurger/pandas
def test_is_gcs_url():
    assert is_gcs_url("gcs://pandas/somethingelse.com")
    assert is_gcs_url("gs://pandas/somethingelse.com")
    assert not is_gcs_url("s3://pandas/somethingelse.com")
コード例 #3
0
ファイル: test_gcs.py プロジェクト: danielmoreira12/BAProject
def test_is_gcs_url():
    assert is_gcs_url("gcs://pandas/somethingelse.com")
    assert is_gcs_url("gs://pandas/somethingelse.com")
    assert not is_gcs_url("s3://pandas/somethingelse.com")