def write( self, df: DataFrame, path, compression="snappy", index=None, partition_cols=None, **kwargs, ): self.validate_dataframe(df) # thriftpy/protocol/compact.py:339: # DeprecationWarning: tostring() is deprecated. # Use tobytes() instead. if "partition_on" in kwargs and partition_cols is not None: raise ValueError( "Cannot use both partition_on and " "partition_cols. Use partition_cols for " "partitioning data" ) elif "partition_on" in kwargs: partition_cols = kwargs.pop("partition_on") if partition_cols is not None: kwargs["file_scheme"] = "hive" if is_s3_url(path) or is_gcs_url(path): # if path is s3:// or gs:// we need to open the file in 'wb' mode. # TODO: Support 'ab' path, _, _, _ = get_filepath_or_buffer(path, mode="wb") # And pass the opened file to the fastparquet internal impl. kwargs["open_with"] = lambda path, _: path else: path, _, _, _ = get_filepath_or_buffer(path) with catch_warnings(record=True): self.api.write( path, df, compression=compression, write_index=index, partition_on=partition_cols, **kwargs, )
def test_is_gcs_url(): assert is_gcs_url("gcs://pandas/somethingelse.com") assert is_gcs_url("gs://pandas/somethingelse.com") assert not is_gcs_url("s3://pandas/somethingelse.com")
def test_is_gcs_url(): assert is_gcs_url("gcs://pandas/somethingelse.com") assert is_gcs_url("gs://pandas/somethingelse.com") assert not is_gcs_url("s3://pandas/somethingelse.com")