Ejemplo n.º 1
0
def frame_block_to_pandas(sds: "SystemDSContext", fb: JavaObject):

    num_rows = fb.getNumRows()
    num_cols = fb.getNumColumns()
    data = []
    df = pd.DataFrame()

    for c_index in range(num_cols):
        d_type = fb.getColumnType(c_index)
        if d_type == "String":
            ret = []
            for row in range(num_rows):
                ent = fb.getIndexAsBytes(c_index, row)
                if ent:
                    ent = ent.decode()
                    ret.append(ent)
                else:
                    ret.append(None)
        elif d_type == "Int":
            byteArray = fb.getColumnAsBytes(c_index)
            ret = np.frombuffer(byteArray, dtype=np.int32)
        elif d_type == "Long":
            byteArray = fb.getColumnAsBytes(c_index)
            ret = np.frombuffer(byteArray, dtype=np.int64)
        elif d_type == "Double":
            byteArray = fb.getColumnAsBytes(c_index)
            ret = np.frombuffer(byteArray, dtype=np.float64)
        elif d_type == "Boolean":
            # TODO maybe it is more efficient to bit pack the booleans.
            # https://stackoverflow.com/questions/5602155/numpy-boolean-array-with-1-bit-entries
            byteArray = fb.getColumnAsBytes(c_index)
            ret = np.frombuffer(byteArray, dtype=np.dtype("?"))
        else:
            raise NotImplementedError(
                f'Not Implemented {d_type} for systemds to pandas parsing')
        df[fb.getColumnName(c_index)] = ret

    return df