def dataframe_to_thrift_struct(df, name, roffset, coffset, rows, cols, format): """ :type df: pandas.core.frame.DataFrame :type name: str :type coffset: int :type roffset: int :type rows: int :type cols: int :type format: str """ dim = len(df.axes) num_rows = df.shape[0] num_cols = df.shape[1] if dim > 1 else 1 array_chunk = GetArrayResponse() array_chunk.slice = name array_chunk.rows = num_rows array_chunk.cols = num_cols array_chunk.format = "" array_chunk.type = "" array_chunk.max = "0" array_chunk.min = "0" if (rows, cols) == (-1, -1): rows, cols = num_rows, num_cols rows = min(rows, MAXIMUM_ARRAY_SIZE) cols = min(cols, MAXIMUM_ARRAY_SIZE, num_cols) # need to precompute column bounds here before slicing! col_bounds = [None] * cols dtypes = [None] * cols if dim > 1: for col in range(cols): dtype = df.dtypes.iloc[coffset + col].kind dtypes[col] = dtype if dtype in "biufc": cvalues = df.iloc[:, coffset + col] bounds = (cvalues.min(), cvalues.max()) else: bounds = (0, 0) col_bounds[col] = bounds else: dtype = df.dtype.kind dtypes[0] = dtype col_bounds[0] = (df.min(), df.max()) if dtype in "biufc" else (0, 0) df = df.iloc[roffset: roffset + rows, coffset: coffset + cols] if dim > 1 else df.iloc[roffset: roffset + rows] rows = df.shape[0] cols = df.shape[1] if dim > 1 else 1 format = format.replace('%', '') def col_to_format(c): return format if dtypes[c] == 'f' and format else array_default_format(dtypes[c]) array_chunk.headers = header_data_to_thrift_struct(rows, cols, dtypes, col_bounds, col_to_format, df, dim) array_chunk.data = array_data_to_thrift_struct(rows, cols, lambda r: (("%" + col_to_format(c)) % (df.iat[r, c] if dim > 1 else df.iat[r]) for c in range(cols))) return array_chunk
def dataframe_to_thrift_struct(df, name, roffset, coffset, rows, cols, format): """ :type df: pandas.core.frame.DataFrame :type name: str :type coffset: int :type roffset: int :type rows: int :type cols: int :type format: str """ dim = len(df.axes) num_rows = df.shape[0] num_cols = df.shape[1] if dim > 1 else 1 array_chunk = GetArrayResponse() array_chunk.slice = name array_chunk.rows = num_rows array_chunk.cols = num_cols array_chunk.format = "" array_chunk.type = "" array_chunk.max = "0" array_chunk.min = "0" if (rows, cols) == (-1, -1): rows, cols = num_rows, num_cols rows = min(rows, MAXIMUM_ARRAY_SIZE) cols = min(cols, MAXIMUM_ARRAY_SIZE, num_cols) # need to precompute column bounds here before slicing! col_bounds = [None] * cols dtypes = [None] * cols if dim > 1: for col in range(cols): dtype = df.dtypes.iloc[coffset + col].kind dtypes[col] = dtype if dtype in "biufc": cvalues = df.iloc[:, coffset + col] bounds = (cvalues.min(), cvalues.max()) else: bounds = (0, 0) col_bounds[col] = bounds else: dtype = df.dtype.kind dtypes[0] = dtype col_bounds[0] = (df.min(), df.max()) if dtype in "biufc" else (0, 0) df = df.iloc[roffset:roffset + rows, coffset:coffset + cols] if dim > 1 else df.iloc[roffset:roffset + rows] rows = df.shape[0] cols = df.shape[1] if dim > 1 else 1 format = format.replace('%', '') def col_to_format(c): return format if dtypes[c] == 'f' and format else array_default_format( dtypes[c]) array_chunk.headers = header_data_to_thrift_struct(rows, cols, dtypes, col_bounds, col_to_format, df, dim) array_chunk.data = array_data_to_thrift_struct( rows, cols, lambda r: (("%" + col_to_format(c)) % (df.iat[r, c] if dim > 1 else df.iat[r]) for c in range(cols))) return array_chunk
def dataframe_to_thrift_struct(df, name, roffset, coffset, rows, cols, format): """ :type df: pandas.core.frame.DataFrame :type name: str :type coffset: int :type roffset: int :type rows: int :type cols: int :type format: str """ dim = len(df.axes) num_rows = df.shape[0] num_cols = df.shape[1] if dim > 1 else 1 array_chunk = GetArrayResponse() array_chunk.slice = name array_chunk.rows = num_rows array_chunk.cols = num_cols array_chunk.type = "" array_chunk.max = "0" array_chunk.min = "0" format = format.replace("%", "") if not format: if num_rows > 0 and num_cols == 1: # series or data frame with one column try: kind = df.dtype.kind except AttributeError: try: kind = df.dtypes[0].kind except IndexError: kind = "O" format = array_default_format(kind) else: format = array_default_format("f") array_chunk.format = "%" + format if (rows, cols) == (-1, -1): rows, cols = num_rows, num_cols rows = min(rows, MAXIMUM_ARRAY_SIZE) cols = min(cols, MAXIMUM_ARRAY_SIZE, num_cols) # need to precompute column bounds here before slicing! col_bounds = [None] * cols dtypes = [None] * cols if dim > 1: for col in range(cols): dtype = df.dtypes.iloc[coffset + col].kind dtypes[col] = dtype if dtype in NUMPY_NUMERIC_TYPES: cvalues = df.iloc[:, coffset + col] bounds = (cvalues.min(), cvalues.max()) else: bounds = (0, 0) col_bounds[col] = bounds else: dtype = df.dtype.kind dtypes[0] = dtype col_bounds[0] = (df.min(), df.max()) if dtype in NUMPY_NUMERIC_TYPES else (0, 0) df = df.iloc[roffset: roffset + rows, coffset: coffset + cols] if dim > 1 else df.iloc[roffset: roffset + rows] rows = df.shape[0] cols = df.shape[1] if dim > 1 else 1 def col_to_format(c): return format if dtypes[c] in NUMPY_NUMERIC_TYPES and format else array_default_format(dtypes[c]) iat = df.iat if dim == 1 or len(df.columns.unique()) == len(df.columns) else df.iloc array_chunk.headers = header_data_to_thrift_struct(rows, cols, dtypes, col_bounds, col_to_format, df, dim) array_chunk.data = array_data_to_thrift_struct(rows, cols, lambda r: (("%" + col_to_format(c)) % (iat[r, c] if dim > 1 else iat[r]) for c in range(cols)), format) return array_chunk