Esempio n. 1
0
def unbox_dataframe(typ, val, c):
    """unbox dataframe to an empty DataFrame struct
    columns will be extracted later if necessary.
    """
    n_cols = len(typ.columns)
    column_strs = [
        numba.unicode.make_string_from_constant(c.context, c.builder,
                                                string_type, a)
        for a in typ.columns
    ]
    # create dataframe struct and store values
    dataframe = cgutils.create_struct_proxy(typ)(c.context, c.builder)

    column_tup = c.context.make_tuple(c.builder,
                                      types.UniTuple(string_type, n_cols),
                                      column_strs)

    # this unboxes all DF columns so that no column unboxing occurs later
    for col_ind in range(n_cols):
        series_obj = c.pyapi.object_getattr_string(val, typ.columns[col_ind])
        arr_obj = c.pyapi.object_getattr_string(series_obj, "values")
        ty_series = typ.data[col_ind]
        if isinstance(ty_series, types.Array):
            native_val = unbox_array(typ.data[col_ind], arr_obj, c)
        elif ty_series == string_array_type:
            native_val = unbox_str_series(string_array_type, series_obj, c)

        dataframe.data = c.builder.insert_value(dataframe.data,
                                                native_val.value, col_ind)

    # TODO: support unboxing index
    if typ.index == types.none:
        dataframe.index = c.context.get_constant(types.none, None)
    if typ.index == string_array_type:
        index_obj = c.pyapi.object_getattr_string(val, "index")
        dataframe.index = unbox_str_series(string_array_type, index_obj,
                                           c).value
    if isinstance(typ.index, types.Array):
        index_obj = c.pyapi.object_getattr_string(val, "index")
        index_data = c.pyapi.object_getattr_string(index_obj, "_data")
        dataframe.index = unbox_array(typ.index, index_data, c).value

    dataframe.columns = column_tup
    dataframe.parent = val

    # increase refcount of stored values
    if c.context.enable_nrt:
        # TODO: other objects?
        for var in column_strs:
            c.context.nrt.incref(c.builder, string_type, var)

    return NativeValue(dataframe._getvalue())
Esempio n. 2
0
def _unbox_index_data(index_typ, index_obj, c):
    """ Unboxes Pandas index object basing on the native type inferred previously.
        Params:
            index_typ: native Numba type the object is to be unboxed into
            index_obj: Python object to be unboxed
            c: LLVM context object
        Returns: LLVM instructions to generate native value
    """
    if isinstance(index_typ, RangeIndexType):
        return unbox_range_index(index_typ, index_obj, c)

    if isinstance(index_typ, Int64IndexType):
        return unbox_int64_index(index_typ, index_obj, c)

    if index_typ == string_array_type:
        return unbox_str_series(index_typ, index_obj, c)

    # this is still here only because of Float64Index represented as array
    # TO-DO: remove when it's added
    if isinstance(index_typ, types.Array):
        index_data = c.pyapi.object_getattr_string(index_obj, "_data")
        res = unbox_array(index_typ, index_data, c)
        c.pyapi.decref(index_data)
        return res

    if isinstance(index_typ, types.NoneType):
        return unbox_none(index_typ, index_obj, c)

    assert False, f"_unbox_index_data: unexpected index type({index_typ}) while unboxing"
Esempio n. 3
0
def _unbox_series_data(dtype, data_typ, arr_obj, c):
    if data_typ == string_array_type:
        return unbox_str_series(string_array_type, arr_obj, c)
    elif data_typ == list_string_array_type:
        return _unbox_array_list_str(arr_obj, c)
    elif isinstance(dtype, CategoricalDtypeType):
        return unbox_Categorical(data_typ, arr_obj, c)

    # TODO: error handling like Numba callwrappers.py
    return unbox_array(data_typ, arr_obj, c)
Esempio n. 4
0
def _unbox_series_data(dtype, data_typ, arr_obj, c):
    if data_typ == string_array_type:
        return unbox_str_series(string_array_type, arr_obj, c)
    elif dtype == datetime_date_type:
        return unbox_datetime_date_array(data_typ, arr_obj, c)
    elif data_typ == list_string_array_type:
        return _unbox_array_list_str(arr_obj, c)
    elif data_typ == string_array_split_view_type:
        # XXX dummy unboxing to avoid errors in _get_dataframe_data()
        out_view = c.context.make_helper(c.builder,
                                         string_array_split_view_type)
        return NativeValue(out_view._getvalue())
    elif isinstance(dtype, PDCategoricalDtype):
        return unbox_categorical_array(data_typ, arr_obj, c)

    # TODO: error handling like Numba callwrappers.py
    return unbox_array(data_typ, arr_obj, c)
Esempio n. 5
0
def unbox_dataframe(typ, val, c):
    """unbox dataframe to an empty DataFrame struct
    columns will be extracted later if necessary.
    """
    n_cols = len(typ.columns)
    column_strs = [
        numba.unicode.make_string_from_constant(c.context, c.builder,
                                                string_type, a)
        for a in typ.columns
    ]
    # create dataframe struct and store values
    dataframe = cgutils.create_struct_proxy(typ)(c.context, c.builder)

    column_tup = c.context.make_tuple(c.builder,
                                      types.UniTuple(string_type, n_cols),
                                      column_strs)
    zero = c.context.get_constant(types.int8, 0)
    unboxed_tup = c.context.make_tuple(c.builder,
                                       types.UniTuple(types.int8, n_cols + 1),
                                       [zero] * (n_cols + 1))

    # TODO: support unboxing index
    if typ.index == types.none:
        dataframe.index = c.context.get_constant(types.none, None)
    if typ.index == string_array_type:
        index_obj = c.pyapi.object_getattr_string(val, "index")
        dataframe.index = unbox_str_series(string_array_type, index_obj,
                                           c).value
    if isinstance(typ.index, types.Array):
        index_obj = c.pyapi.object_getattr_string(val, "index")
        index_data = c.pyapi.object_getattr_string(index_obj, "_data")
        dataframe.index = unbox_array(typ.index, index_data, c).value

    dataframe.columns = column_tup
    dataframe.unboxed = unboxed_tup
    dataframe.parent = val

    # increase refcount of stored values
    if c.context.enable_nrt:
        # TODO: other objects?
        for var in column_strs:
            c.context.nrt.incref(c.builder, string_type, var)

    return NativeValue(dataframe._getvalue())
Esempio n. 6
0
def unbox_series(typ, val, c):
    arr_obj = c.pyapi.object_getattr_string(val, "values")
    series = cgutils.create_struct_proxy(typ)(c.context, c.builder)
    series.data = _unbox_series_data(typ.dtype, typ.data, arr_obj, c).value
    # TODO: other indices
    if typ.index == string_array_type:
        index_obj = c.pyapi.object_getattr_string(val, "index")
        series.index = unbox_str_series(string_array_type, index_obj, c).value

    if isinstance(typ.index, types.Array):
        index_obj = c.pyapi.object_getattr_string(val, "index")
        index_data = c.pyapi.object_getattr_string(index_obj, "_data")
        series.index = unbox_array(typ.index, index_data, c).value

    if typ.is_named:
        name_obj = c.pyapi.object_getattr_string(val, "name")
        series.name = numba.unicode.unbox_unicode_str(string_type, name_obj,
                                                      c).value
    # TODO: handle index and name
    c.pyapi.decref(arr_obj)
    return NativeValue(series._getvalue())
Esempio n. 7
0
def unbox_dataframe(typ, val, c):
    """unbox dataframe to an empty DataFrame struct
    columns will be extracted later if necessary.
    """
    n_cols = len(typ.columns)
    column_strs = [
        numba.cpython.unicode.make_string_from_constant(
            c.context, c.builder, string_type, a) for a in typ.columns
    ]
    # create dataframe struct and store values
    dataframe = cgutils.create_struct_proxy(typ)(c.context, c.builder)

    errorptr = cgutils.alloca_once_value(c.builder, cgutils.false_bit)

    col_list_type = types.List(string_type)
    ok, inst = listobj.ListInstance.allocate_ex(c.context, c.builder,
                                                col_list_type, n_cols)

    with c.builder.if_else(ok, likely=True) as (if_ok, if_not_ok):
        with if_ok:
            inst.size = c.context.get_constant(types.intp, n_cols)
            for i, column_str in enumerate(column_strs):
                inst.setitem(c.context.get_constant(types.intp, i),
                             column_str,
                             incref=False)
            dataframe.columns = inst.value

        with if_not_ok:
            c.builder.store(cgutils.true_bit, errorptr)

    # If an error occurred, drop the whole native list
    with c.builder.if_then(c.builder.load(errorptr)):
        c.context.nrt.decref(c.builder, col_list_type, inst.value)

    _, data_typs_map, types_order = get_structure_maps(typ.data, typ.columns)

    for col_typ in types_order:
        type_id, col_indices = data_typs_map[col_typ]
        n_type_cols = len(col_indices)
        list_type = types.List(col_typ)
        ok, inst = listobj.ListInstance.allocate_ex(c.context, c.builder,
                                                    list_type, n_type_cols)

        with c.builder.if_else(ok, likely=True) as (if_ok, if_not_ok):
            with if_ok:
                inst.size = c.context.get_constant(types.intp, n_type_cols)
                for i, col_idx in enumerate(col_indices):
                    series_obj = c.pyapi.object_getattr_string(
                        val, typ.columns[col_idx])
                    arr_obj = c.pyapi.object_getattr_string(
                        series_obj, "values")
                    ty_series = typ.data[col_idx]
                    if isinstance(ty_series, types.Array):
                        native_val = unbox_array(typ.data[col_idx], arr_obj, c)
                    elif ty_series == string_array_type:
                        native_val = unbox_str_series(string_array_type,
                                                      series_obj, c)

                    inst.setitem(c.context.get_constant(types.intp, i),
                                 native_val.value,
                                 incref=False)

                dataframe.data = c.builder.insert_value(
                    dataframe.data, inst.value, type_id)

            with if_not_ok:
                c.builder.store(cgutils.true_bit, errorptr)

        # If an error occurred, drop the whole native list
        with c.builder.if_then(c.builder.load(errorptr)):
            c.context.nrt.decref(c.builder, list_type, inst.value)

    index_obj = c.pyapi.object_getattr_string(val, "index")
    dataframe.index = _unbox_index_data(typ.index, index_obj, c).value
    c.pyapi.decref(index_obj)

    dataframe.parent = val

    # increase refcount of stored values
    if c.context.enable_nrt:
        # TODO: other objects?
        for var in column_strs:
            c.context.nrt.incref(c.builder, string_type, var)

    return NativeValue(dataframe._getvalue(),
                       is_error=c.builder.load(errorptr))