def write_dataframe(df, path): ''' Write a pandas.DataFrame to Feather format ''' writer = ext.FeatherWriter(path) # TODO(wesm): pipeline conversion to Arrow memory layout for i, name in enumerate(df.columns): col = df.iloc[:, i] if not isinstance(name, six.string_types): name = str(name) writer.write_array(name, col) writer.close()
def write_dataframe(df, path): ''' Write a pandas.DataFrame to Feather format ''' path = unicode(path) writer = ext.FeatherWriter(path) if isinstance(df, pd.SparseDataFrame): df = df.to_dense() if not df.columns.is_unique: raise ValueError("cannot serialize duplicate column names") # TODO(wesm): pipeline conversion to Arrow memory layout for i, name in enumerate(df.columns): col = df.iloc[:, i] if pdapi.is_object_dtype(col): inferred_type = pd.lib.infer_dtype(col) msg = ("cannot serialize column {n} " "named {name} with dtype {dtype}".format( n=i, name=name, dtype=inferred_type)) if inferred_type in ['mixed']: # allow columns with nulls + an inferable type inferred_type = pd.lib.infer_dtype(col[col.notnull()]) if inferred_type in ['mixed']: raise ValueError(msg) elif inferred_type not in ['unicode', 'string']: raise ValueError(msg) if not isinstance(name, six.string_types): name = str(name) writer.write_array(name, col) writer.close()