Beispiel #1
0
def write_dataframe(df, path):
    '''
    Write a pandas.DataFrame to Feather format
    '''
    writer = ext.FeatherWriter(path)

    # TODO(wesm): pipeline conversion to Arrow memory layout
    for i, name in enumerate(df.columns):
        col = df.iloc[:, i]

        if not isinstance(name, six.string_types):
            name = str(name)

        writer.write_array(name, col)

    writer.close()
Beispiel #2
0
def write_dataframe(df, path):
    '''
    Write a pandas.DataFrame to Feather format
    '''
    path = unicode(path)
    writer = ext.FeatherWriter(path)

    if isinstance(df, pd.SparseDataFrame):
        df = df.to_dense()

    if not df.columns.is_unique:
        raise ValueError("cannot serialize duplicate column names")

    # TODO(wesm): pipeline conversion to Arrow memory layout
    for i, name in enumerate(df.columns):
        col = df.iloc[:, i]

        if pdapi.is_object_dtype(col):
            inferred_type = pd.lib.infer_dtype(col)
            msg = ("cannot serialize column {n} "
                   "named {name} with dtype {dtype}".format(
                       n=i, name=name, dtype=inferred_type))

            if inferred_type in ['mixed']:

                # allow columns with nulls + an inferable type
                inferred_type = pd.lib.infer_dtype(col[col.notnull()])
                if inferred_type in ['mixed']:
                    raise ValueError(msg)

            elif inferred_type not in ['unicode', 'string']:
                raise ValueError(msg)

        if not isinstance(name, six.string_types):
            name = str(name)

        writer.write_array(name, col)

    writer.close()