def encode_matrix_fbs(matrix, row_idx=None, col_idx=None): """ Given a 2D DataFrame, ndarray or sparse equivalent, create and return a Matrix flatbuffer. :param matrix: 2D DataFrame, ndarray or sparse equivalent :param row_idx: index for row dimension, Index or ndarray :param col_idx: index for col dimension, Index or ndarray NOTE: row indices are (currently) unsupported and must be None """ if row_idx is not None: raise ValueError("row indexing not supported for FBS Matrix") if matrix.ndim != 2: raise ValueError("FBS Matrix must be 2D") (n_rows, n_cols) = matrix.shape # estimate size needed, so we don't unnecessarily realloc. builder = flatbuffers.Builder(guess_at_mem_needed(matrix)) if isinstance(matrix, pd.DataFrame): matrix_columns = reversed(tuple(matrix[name] for name in matrix)) else: matrix_columns = reversed(tuple(c for c in matrix.T)) columns = [] # for idx in reversed(np.arange(n_cols)): for c in matrix_columns: # serialize the typed array typed_arr = serialize_typed_array(builder, c, column_encoding) # serialize the Column union columns.append(serialize_column(builder, typed_arr)) # Serialize Matrix.columns[] Matrix.MatrixStartColumnsVector(builder, n_cols) for c in columns: builder.PrependUOffsetTRelative(c) matrix_column_vec = builder.EndVector(n_cols) # serialize the colIndex if provided cidx = None if col_idx is not None: cidx = serialize_typed_array(builder, col_idx, index_encoding) # Serialize Matrix matrix = serialize_matrix(builder, n_rows, n_cols, matrix_column_vec, cidx) builder.Finish(matrix) return builder.Output()
def serialize_matrix(builder, n_rows, n_cols, columns, col_idx): """ Serialize NetEncoding.Matrix """ Matrix.MatrixStart(builder) Matrix.MatrixAddNRows(builder, n_rows) Matrix.MatrixAddNCols(builder, n_cols) Matrix.MatrixAddColumns(builder, columns) if col_idx is not None: (u_type, u_val) = col_idx Matrix.MatrixAddColIndexType(builder, u_type) Matrix.MatrixAddColIndex(builder, u_val) return Matrix.MatrixEnd(builder)