Ejemplo n.º 1
0
def matrix_block_to_numpy(jvm: JVMView, mb: JavaObject):
    numRows = mb.getNumRows()
    numCols = mb.getNumColumns()
    buf = jvm.org.tugraz.sysds.runtime.compress.utils.Py4jConverterUtils.convertMBtoPy4JDenseArr(
        mb)
    return np.frombuffer(buf, count=numRows * numCols,
                         dtype=np.float64).reshape((numRows, numCols))
Ejemplo n.º 2
0
def frame_block_to_pandas(sds: "SystemDSContext", fb: JavaObject):
    num_rows = fb.getNumRows()
    num_cols = fb.getNumColumns()
    df = pd.DataFrame()
    for c_index in range(num_cols):
        col_data = fb.getColumnData(c_index)
        df[fb.getColumnName(c_index)] = np.array(col_data[:num_rows])

    return df
Ejemplo n.º 3
0
def matrix_block_to_numpy(jvm: JVMView, mb: JavaObject):
    """Converts a MatrixBlock object in the JVM to a numpy array.

    :param jvm: The current JVM instance running systemds.
    :param mb: A pointer to the JVM's MatrixBlock object.
    """
    num_ros = mb.getNumRows()
    num_cols = mb.getNumColumns()
    buf = jvm.org.apache.sysds.runtime.util.Py4jConverterUtils.convertMBtoPy4JDenseArr(
        mb)
    return np.frombuffer(buf, count=num_ros * num_cols,
                         dtype=np.float64).reshape((num_ros, num_cols))
Ejemplo n.º 4
0
def frame_block_to_pandas(sds: "SystemDSContext", fb: JavaObject):

    num_rows = fb.getNumRows()
    num_cols = fb.getNumColumns()
    data = []
    df = pd.DataFrame()

    for c_index in range(num_cols):
        d_type = fb.getColumnType(c_index)
        if d_type == "String":
            ret = []
            for row in range(num_rows):
                ent = fb.getIndexAsBytes(c_index, row)
                if ent:
                    ent = ent.decode()
                    ret.append(ent)
                else:
                    ret.append(None)
        elif d_type == "Int":
            byteArray = fb.getColumnAsBytes(c_index)
            ret = np.frombuffer(byteArray, dtype=np.int32)
        elif d_type == "Long":
            byteArray = fb.getColumnAsBytes(c_index)
            ret = np.frombuffer(byteArray, dtype=np.int64)
        elif d_type == "Double":
            byteArray = fb.getColumnAsBytes(c_index)
            ret = np.frombuffer(byteArray, dtype=np.float64)
        elif d_type == "Boolean":
            # TODO maybe it is more efficient to bit pack the booleans.
            # https://stackoverflow.com/questions/5602155/numpy-boolean-array-with-1-bit-entries
            byteArray = fb.getColumnAsBytes(c_index)
            ret = np.frombuffer(byteArray, dtype=np.dtype("?"))
        else:
            raise NotImplementedError(
                f'Not Implemented {d_type} for systemds to pandas parsing')
        df[fb.getColumnName(c_index)] = ret

    return df