Exemple #1
0
def _box_series_data(dtype, data_typ, val, c):

    if isinstance(dtype, types.BaseTuple):
        np_dtype = np.dtype(','.join(str(t) for t in dtype.types), align=True)
        dtype = numba.numpy_support.from_dtype(np_dtype)

    if dtype == string_type:
        arr = box_str_arr(string_array_type, val, c)
    elif dtype == datetime_date_type:
        arr = box_datetime_date_array(data_typ, val, c)
    elif isinstance(dtype, PDCategoricalDtype):
        arr = box_categorical_array(data_typ, val, c)
    elif data_typ == string_array_split_view_type:
        arr = box_str_arr_split_view(data_typ, val, c)
    elif dtype == types.List(string_type):
        arr = box_list(list_string_array_type, val, c)
    else:
        arr = box_array(data_typ, val, c)

    if isinstance(dtype, types.Record):
        o_str = c.context.insert_const_string(c.builder.module, "O")
        o_str = c.pyapi.string_from_string(o_str)
        arr = c.pyapi.call_method(arr, "astype", (o_str, ))

    return arr
Exemple #2
0
def box_dataframe(typ, val, c):
    context = c.context
    builder = c.builder

    n_cols = len(typ.columns)
    col_names = typ.columns
    arr_typs = typ.data
    dtypes = [a.dtype for a in arr_typs]  # TODO: check Categorical

    dataframe = cgutils.create_struct_proxy(typ)(context, builder, value=val)
    col_arrs = [
        builder.extract_value(dataframe.data, i) for i in range(n_cols)
    ]
    # df unboxed from Python
    has_parent = cgutils.is_not_null(builder, dataframe.parent)

    pyapi = c.pyapi
    # gil_state = pyapi.gil_ensure()  # acquire GIL

    mod_name = context.insert_const_string(c.builder.module, "pandas")
    class_obj = pyapi.import_module_noblock(mod_name)
    df_obj = pyapi.call_method(class_obj, "DataFrame", ())

    for i, cname, arr, arr_typ, dtype in zip(range(n_cols), col_names,
                                             col_arrs, arr_typs, dtypes):
        # df['cname'] = boxed_arr
        # TODO: datetime.date, DatetimeIndex?
        name_str = context.insert_const_string(c.builder.module, cname)
        cname_obj = pyapi.string_from_string(name_str)

        if dtype == string_type:
            arr_obj = box_str_arr(arr_typ, arr, c)
        elif isinstance(dtype, PDCategoricalDtype):
            arr_obj = box_categorical_array(arr_typ, arr, c)
            # context.nrt.incref(builder, arr_typ, arr)
        elif arr_typ == string_array_split_view_type:
            arr_obj = box_str_arr_split_view(arr_typ, arr, c)
        elif dtype == types.List(string_type):
            arr_obj = box_list(list_string_array_type, arr, c)
            # context.nrt.incref(builder, arr_typ, arr)  # TODO required?
            # pyapi.print_object(arr_obj)
        else:
            arr_obj = box_array(arr_typ, arr, c)
            # TODO: is incref required?
            # context.nrt.incref(builder, arr_typ, arr)
        pyapi.object_setitem(df_obj, cname_obj, arr_obj)

        # pyapi.decref(arr_obj)
        pyapi.decref(cname_obj)

    # set df.index if necessary
    if typ.index != types.none:
        arr_obj = _box_series_data(typ.index.dtype, typ.index, dataframe.index,
                                   c)
        pyapi.object_setattr_string(df_obj, 'index', arr_obj)

    pyapi.decref(class_obj)
    # pyapi.gil_release(gil_state)    # release GIL
    return df_obj
Exemple #3
0
def box_dataframe(typ, val, c):
    context = c.context
    builder = c.builder

    col_names = typ.columns
    arr_typs = typ.data

    dataframe = cgutils.create_struct_proxy(typ)(context, builder, value=val)

    pyapi = c.pyapi
    # gil_state = pyapi.gil_ensure()  # acquire GIL

    mod_name = context.insert_const_string(c.builder.module, "pandas")
    class_obj = pyapi.import_module_noblock(mod_name)
    df_dict = pyapi.dict_new()

    arrays_list_objs = {}
    for cname, arr_typ in zip(col_names, arr_typs):
        # df['cname'] = boxed_arr
        # TODO: datetime.date, DatetimeIndex?
        name_str = context.insert_const_string(c.builder.module, cname)
        cname_obj = pyapi.string_from_string(name_str)

        col_loc = typ.column_loc[cname]
        type_id, col_id = col_loc.type_id, col_loc.col_id

        # dataframe.data looks like a tuple(list(array))
        # e.g. ([array(int64, 1d, C), array(int64, 1d, C)], [array(float64, 1d, C)])
        arrays_list_obj = arrays_list_objs.get(type_id)
        if arrays_list_obj is None:
            list_typ = types.List(arr_typ)
            # extracting list from the tuple
            list_val = builder.extract_value(dataframe.data, type_id)
            # getting array from the list to box it then
            arrays_list_obj = box_list(list_typ, list_val, c)
            arrays_list_objs[type_id] = arrays_list_obj

        # PyList_GetItem returns borrowed reference
        arr_obj = pyapi.list_getitem(arrays_list_obj, col_id)
        pyapi.dict_setitem(df_dict, cname_obj, arr_obj)

        pyapi.decref(cname_obj)

    df_obj = pyapi.call_method(class_obj, "DataFrame", (df_dict,))
    pyapi.decref(df_dict)

    # set df.index if necessary
    if typ.index != types.none:
        index_obj = _box_index_data(typ.index, dataframe.index, c)
        pyapi.object_setattr_string(df_obj, 'index', index_obj)
        pyapi.decref(index_obj)

    for arrays_list_obj in arrays_list_objs.values():
        pyapi.decref(arrays_list_obj)

    pyapi.decref(class_obj)
    # pyapi.gil_release(gil_state)    # release GIL
    return df_obj
Exemple #4
0
def unbox_dataframe(typ, val, c):
    """unbox dataframe to an empty DataFrame struct
    columns will be extracted later if necessary.
    """
    n_cols = len(typ.columns)
    # create dataframe struct and store values
    dataframe = cgutils.create_struct_proxy(typ)(c.context, c.builder)

    errorptr = cgutils.alloca_once_value(c.builder, cgutils.false_bit)

    _, data_typs_map, types_order = get_structure_maps(typ.data, typ.columns)

    for col_typ in types_order:
        type_id, col_indices = data_typs_map[col_typ]
        n_type_cols = len(col_indices)
        list_type = types.List(col_typ)
        ok, inst = listobj.ListInstance.allocate_ex(c.context, c.builder, list_type, n_type_cols)

        with c.builder.if_else(ok, likely=True) as (if_ok, if_not_ok):
            with if_ok:
                inst.size = c.context.get_constant(types.intp, n_type_cols)
                for i, col_idx in enumerate(col_indices):
                    series_obj = c.pyapi.object_getattr_string(val, typ.columns[col_idx])
                    arr_obj = c.pyapi.object_getattr_string(series_obj, "values")
                    ty_series = typ.data[col_idx]

                    # FIXME: CategoricalType has wrong dtype attribute value (i.e. dtype of codes)
                    # current implementation offers pd_dtype for this purpose, so use it
                    column_dtype = ty_series.pd_dtype if isinstance(ty_series, Categorical) else ty_series.dtype
                    native_val = _unbox_series_data(column_dtype, ty_series, arr_obj, c)

                    inst.setitem(c.context.get_constant(types.intp, i), native_val.value, incref=False)
                    c.pyapi.decref(arr_obj)
                    c.pyapi.decref(series_obj)

                dataframe.data = c.builder.insert_value(dataframe.data, inst.value, type_id)

            with if_not_ok:
                c.builder.store(cgutils.true_bit, errorptr)

        # If an error occurred, drop the whole native list
        with c.builder.if_then(c.builder.load(errorptr)):
            c.context.nrt.decref(c.builder, list_type, inst.value)

    index_obj = c.pyapi.object_getattr_string(val, "index")
    dataframe.index = _unbox_index_data(typ.index, index_obj, c).value
    c.pyapi.decref(index_obj)

    dataframe.parent = val

    return NativeValue(dataframe._getvalue(), is_error=c.builder.load(errorptr))
Exemple #5
0
def _infer_series_list_dtype(S):
    for i in range(len(S)):
        first_val = S.iloc[i]
        if not isinstance(first_val, list):
            raise ValueError("data type for column {} not supported".format(
                S.name))
        if len(first_val) > 0:
            # TODO: support more types
            if isinstance(first_val[0], str):
                return types.List(string_type)
            else:
                raise ValueError(
                    "data type for column {} not supported".format(S.name))
    raise ValueError("data type for column {} not supported".format(S.name))
Exemple #6
0
def unbox_dataframe(typ, val, c):
    """unbox dataframe to an empty DataFrame struct
    columns will be extracted later if necessary.
    """
    n_cols = len(typ.columns)
    column_strs = [
        numba.cpython.unicode.make_string_from_constant(
            c.context, c.builder, string_type, a) for a in typ.columns
    ]
    # create dataframe struct and store values
    dataframe = cgutils.create_struct_proxy(typ)(c.context, c.builder)

    errorptr = cgutils.alloca_once_value(c.builder, cgutils.false_bit)

    col_list_type = types.List(string_type)
    ok, inst = listobj.ListInstance.allocate_ex(c.context, c.builder,
                                                col_list_type, n_cols)

    with c.builder.if_else(ok, likely=True) as (if_ok, if_not_ok):
        with if_ok:
            inst.size = c.context.get_constant(types.intp, n_cols)
            for i, column_str in enumerate(column_strs):
                inst.setitem(c.context.get_constant(types.intp, i),
                             column_str,
                             incref=False)
            dataframe.columns = inst.value

        with if_not_ok:
            c.builder.store(cgutils.true_bit, errorptr)

    # If an error occurred, drop the whole native list
    with c.builder.if_then(c.builder.load(errorptr)):
        c.context.nrt.decref(c.builder, col_list_type, inst.value)

    _, data_typs_map, types_order = get_structure_maps(typ.data, typ.columns)

    for col_typ in types_order:
        type_id, col_indices = data_typs_map[col_typ]
        n_type_cols = len(col_indices)
        list_type = types.List(col_typ)
        ok, inst = listobj.ListInstance.allocate_ex(c.context, c.builder,
                                                    list_type, n_type_cols)

        with c.builder.if_else(ok, likely=True) as (if_ok, if_not_ok):
            with if_ok:
                inst.size = c.context.get_constant(types.intp, n_type_cols)
                for i, col_idx in enumerate(col_indices):
                    series_obj = c.pyapi.object_getattr_string(
                        val, typ.columns[col_idx])
                    arr_obj = c.pyapi.object_getattr_string(
                        series_obj, "values")
                    ty_series = typ.data[col_idx]
                    if isinstance(ty_series, types.Array):
                        native_val = unbox_array(typ.data[col_idx], arr_obj, c)
                    elif ty_series == string_array_type:
                        native_val = unbox_str_series(string_array_type,
                                                      series_obj, c)

                    inst.setitem(c.context.get_constant(types.intp, i),
                                 native_val.value,
                                 incref=False)

                dataframe.data = c.builder.insert_value(
                    dataframe.data, inst.value, type_id)

            with if_not_ok:
                c.builder.store(cgutils.true_bit, errorptr)

        # If an error occurred, drop the whole native list
        with c.builder.if_then(c.builder.load(errorptr)):
            c.context.nrt.decref(c.builder, list_type, inst.value)

    index_obj = c.pyapi.object_getattr_string(val, "index")
    dataframe.index = _unbox_index_data(typ.index, index_obj, c).value
    c.pyapi.decref(index_obj)

    dataframe.parent = val

    # increase refcount of stored values
    if c.context.enable_nrt:
        # TODO: other objects?
        for var in column_strs:
            c.context.nrt.incref(c.builder, string_type, var)

    return NativeValue(dataframe._getvalue(),
                       is_error=c.builder.load(errorptr))