def dataframe_to_thrift_struct(df, name, roffset, coffset, rows, cols, format):
    """
    :type df: pandas.core.frame.DataFrame
    :type name: str
    :type coffset: int
    :type roffset: int
    :type rows: int
    :type cols: int
    :type format: str


    """
    dim = len(df.axes)
    num_rows = df.shape[0]
    num_cols = df.shape[1] if dim > 1 else 1
    array_chunk = GetArrayResponse()
    array_chunk.slice = name
    array_chunk.rows = num_rows
    array_chunk.cols = num_cols
    array_chunk.format = ""
    array_chunk.type = ""
    array_chunk.max = "0"
    array_chunk.min = "0"

    if (rows, cols) == (-1, -1):
        rows, cols = num_rows, num_cols

    rows = min(rows, MAXIMUM_ARRAY_SIZE)
    cols = min(cols, MAXIMUM_ARRAY_SIZE, num_cols)
    # need to precompute column bounds here before slicing!
    col_bounds = [None] * cols
    dtypes = [None] * cols
    if dim > 1:
        for col in range(cols):
            dtype = df.dtypes.iloc[coffset + col].kind
            dtypes[col] = dtype
            if dtype in "biufc":
                cvalues = df.iloc[:, coffset + col]
                bounds = (cvalues.min(), cvalues.max())
            else:
                bounds = (0, 0)
            col_bounds[col] = bounds
    else:
        dtype = df.dtype.kind
        dtypes[0] = dtype
        col_bounds[0] = (df.min(), df.max()) if dtype in "biufc" else (0, 0)

    df = df.iloc[roffset: roffset + rows, coffset: coffset + cols] if dim > 1 else df.iloc[roffset: roffset + rows]
    rows = df.shape[0]
    cols = df.shape[1] if dim > 1 else 1
    format = format.replace('%', '')

    def col_to_format(c):
        return format if dtypes[c] == 'f' and format else array_default_format(dtypes[c])

    array_chunk.headers = header_data_to_thrift_struct(rows, cols, dtypes, col_bounds, col_to_format, df, dim)
    array_chunk.data = array_data_to_thrift_struct(rows, cols,
                                                   lambda r: (("%" + col_to_format(c)) % (df.iat[r, c] if dim > 1 else df.iat[r])
                                                              for c in range(cols)))
    return array_chunk
Ejemplo n.º 2
0
def dataframe_to_thrift_struct(df, name, roffset, coffset, rows, cols, format):
    """
    :type df: pandas.core.frame.DataFrame
    :type name: str
    :type coffset: int
    :type roffset: int
    :type rows: int
    :type cols: int
    :type format: str


    """
    dim = len(df.axes)
    num_rows = df.shape[0]
    num_cols = df.shape[1] if dim > 1 else 1
    array_chunk = GetArrayResponse()
    array_chunk.slice = name
    array_chunk.rows = num_rows
    array_chunk.cols = num_cols
    array_chunk.format = ""
    array_chunk.type = ""
    array_chunk.max = "0"
    array_chunk.min = "0"

    if (rows, cols) == (-1, -1):
        rows, cols = num_rows, num_cols

    rows = min(rows, MAXIMUM_ARRAY_SIZE)
    cols = min(cols, MAXIMUM_ARRAY_SIZE, num_cols)
    # need to precompute column bounds here before slicing!
    col_bounds = [None] * cols
    dtypes = [None] * cols
    if dim > 1:
        for col in range(cols):
            dtype = df.dtypes.iloc[coffset + col].kind
            dtypes[col] = dtype
            if dtype in "biufc":
                cvalues = df.iloc[:, coffset + col]
                bounds = (cvalues.min(), cvalues.max())
            else:
                bounds = (0, 0)
            col_bounds[col] = bounds
    else:
        dtype = df.dtype.kind
        dtypes[0] = dtype
        col_bounds[0] = (df.min(), df.max()) if dtype in "biufc" else (0, 0)

    df = df.iloc[roffset:roffset + rows, coffset:coffset +
                 cols] if dim > 1 else df.iloc[roffset:roffset + rows]
    rows = df.shape[0]
    cols = df.shape[1] if dim > 1 else 1
    format = format.replace('%', '')

    def col_to_format(c):
        return format if dtypes[c] == 'f' and format else array_default_format(
            dtypes[c])

    array_chunk.headers = header_data_to_thrift_struct(rows, cols, dtypes,
                                                       col_bounds,
                                                       col_to_format, df, dim)
    array_chunk.data = array_data_to_thrift_struct(
        rows, cols, lambda r: (("%" + col_to_format(c)) %
                               (df.iat[r, c] if dim > 1 else df.iat[r])
                               for c in range(cols)))
    return array_chunk
Ejemplo n.º 3
0
def dataframe_to_thrift_struct(df, name, roffset, coffset, rows, cols, format):
    """
    :type df: pandas.core.frame.DataFrame
    :type name: str
    :type coffset: int
    :type roffset: int
    :type rows: int
    :type cols: int
    :type format: str


    """
    dim = len(df.axes)
    num_rows = df.shape[0]
    num_cols = df.shape[1] if dim > 1 else 1
    array_chunk = GetArrayResponse()
    array_chunk.slice = name
    array_chunk.rows = num_rows
    array_chunk.cols = num_cols
    array_chunk.type = ""
    array_chunk.max = "0"
    array_chunk.min = "0"
    format = format.replace("%", "")
    if not format:
        if num_rows > 0 and num_cols == 1:  # series or data frame with one column
            try:
                kind = df.dtype.kind
            except AttributeError:
                try:
                    kind = df.dtypes[0].kind
                except IndexError:
                    kind = "O"
            format = array_default_format(kind)
        else:
            format = array_default_format("f")
    array_chunk.format = "%" + format

    if (rows, cols) == (-1, -1):
        rows, cols = num_rows, num_cols

    rows = min(rows, MAXIMUM_ARRAY_SIZE)
    cols = min(cols, MAXIMUM_ARRAY_SIZE, num_cols)
    # need to precompute column bounds here before slicing!
    col_bounds = [None] * cols
    dtypes = [None] * cols
    if dim > 1:
        for col in range(cols):
            dtype = df.dtypes.iloc[coffset + col].kind
            dtypes[col] = dtype
            if dtype in NUMPY_NUMERIC_TYPES:
                cvalues = df.iloc[:, coffset + col]
                bounds = (cvalues.min(), cvalues.max())
            else:
                bounds = (0, 0)
            col_bounds[col] = bounds
    else:
        dtype = df.dtype.kind
        dtypes[0] = dtype
        col_bounds[0] = (df.min(), df.max()) if dtype in NUMPY_NUMERIC_TYPES else (0, 0)

    df = df.iloc[roffset: roffset + rows, coffset: coffset + cols] if dim > 1 else df.iloc[roffset: roffset + rows]
    rows = df.shape[0]
    cols = df.shape[1] if dim > 1 else 1

    def col_to_format(c):
        return format if dtypes[c] in NUMPY_NUMERIC_TYPES and format else array_default_format(dtypes[c])

    iat = df.iat if dim == 1 or len(df.columns.unique()) == len(df.columns) else df.iloc

    array_chunk.headers = header_data_to_thrift_struct(rows, cols, dtypes, col_bounds, col_to_format, df, dim)
    array_chunk.data = array_data_to_thrift_struct(rows, cols,
                                                   lambda r: (("%" + col_to_format(c)) % (iat[r, c] if dim > 1 else iat[r])
                                                              for c in range(cols)), format)
    return array_chunk