def unbox_categorical_array(typ, val, c): arr_obj = c.pyapi.object_getattr_string(val, "codes") # c.pyapi.print_object(arr_obj) dtype = get_categories_int_type(typ.dtype) native_val = unbox_array(types.Array(dtype, 1, 'C'), arr_obj, c) c.pyapi.decref(arr_obj) return native_val
def _unbox_index_data(index_typ, index_obj, c): """ Unboxes Pandas index object basing on the native type inferred previously. Params: index_typ: native Numba type the object is to be unboxed into index_obj: Python object to be unboxed c: LLVM context object Returns: LLVM instructions to generate native value """ if isinstance(index_typ, RangeIndexType): return unbox_range_index(index_typ, index_obj, c) if isinstance(index_typ, Int64IndexType): return unbox_int64_index(index_typ, index_obj, c) if index_typ == string_array_type: return unbox_str_series(index_typ, index_obj, c) # this is still here only because of Float64Index represented as array # TO-DO: remove when it's added if isinstance(index_typ, types.Array): index_data = c.pyapi.object_getattr_string(index_obj, "_data") res = unbox_array(index_typ, index_data, c) c.pyapi.decref(index_data) return res if isinstance(index_typ, types.NoneType): return unbox_none(index_typ, index_obj, c) assert False, f"_unbox_index_data: unexpected index type({index_typ}) while unboxing"
def unbox_dataframe(typ, val, c): """unbox dataframe to an empty DataFrame struct columns will be extracted later if necessary. """ n_cols = len(typ.columns) column_strs = [numba.cpython.unicode.make_string_from_constant( c.context, c.builder, string_type, a) for a in typ.columns] # create dataframe struct and store values dataframe = cgutils.create_struct_proxy(typ)(c.context, c.builder) column_tup = c.context.make_tuple( c.builder, types.UniTuple(string_type, n_cols), column_strs) # this unboxes all DF columns so that no column unboxing occurs later for col_ind in range(n_cols): series_obj = c.pyapi.object_getattr_string(val, typ.columns[col_ind]) arr_obj = c.pyapi.object_getattr_string(series_obj, "values") ty_series = typ.data[col_ind] if isinstance(ty_series, types.Array): native_val = unbox_array(typ.data[col_ind], arr_obj, c) elif ty_series == string_array_type: native_val = unbox_str_series(string_array_type, series_obj, c) dataframe.data = c.builder.insert_value( dataframe.data, native_val.value, col_ind) # TODO: support unboxing index if typ.index == types.none: dataframe.index = c.context.get_constant(types.none, None) if typ.index == string_array_type: index_obj = c.pyapi.object_getattr_string(val, "index") dataframe.index = unbox_str_series(string_array_type, index_obj, c).value if isinstance(typ.index, types.Array): index_obj = c.pyapi.object_getattr_string(val, "index") index_data = c.pyapi.object_getattr_string(index_obj, "_data") dataframe.index = unbox_array(typ.index, index_data, c).value dataframe.columns = column_tup dataframe.parent = val # increase refcount of stored values if c.context.enable_nrt: # TODO: other objects? for var in column_strs: c.context.nrt.incref(c.builder, string_type, var) return NativeValue(dataframe._getvalue())
def _unbox_series_data(dtype, data_typ, arr_obj, c): if data_typ == string_array_type: return unbox_str_series(string_array_type, arr_obj, c) elif data_typ == list_string_array_type: return _unbox_array_list_str(arr_obj, c) elif isinstance(dtype, CategoricalDtypeType): return unbox_Categorical(data_typ, arr_obj, c) # TODO: error handling like Numba callwrappers.py return unbox_array(data_typ, arr_obj, c)
def unbox_int64_index(typ, val, c): # TODO: support index unboxing with reference to parent in Numba? int64_index = cgutils.create_struct_proxy(typ)(c.context, c.builder) index_data = c.pyapi.object_getattr_string(val, "_data") int64_index.data = unbox_array(typ.data, index_data, c).value c.pyapi.decref(index_data) if typ.is_named: name_obj = c.pyapi.object_getattr_string(val, "name") int64_index.name = numba.cpython.unicode.unbox_unicode_str( types.unicode_type, name_obj, c).value c.pyapi.decref(name_obj) is_error = cgutils.is_not_null(c.builder, c.pyapi.err_occurred()) return NativeValue(int64_index._getvalue(), is_error=is_error)
def unbox_int64_index(typ, val, c): nlevels = len(typ.levels) levels_types = typ.levels_types codes_types = typ.codes_types multi_index = cgutils.create_struct_proxy(typ)(c.context, c.builder) py_levels_data = c.pyapi.object_getattr_string(val, "levels") native_levels_data = [] for i in range(nlevels): idx = c.pyapi.long_from_ulonglong( c.context.get_constant(types.int64, i)) level_data = c.pyapi.object_getitem(py_levels_data, idx) native_levels_data.append( _unbox_index_data(levels_types[i], level_data, c).value) c.pyapi.decref(level_data) c.pyapi.decref(py_levels_data) multi_index.levels = c.context.make_tuple(c.builder, typ.levels, native_levels_data) py_codes_data = c.pyapi.object_getattr_string(val, "codes") native_codes_data = [] for i in range(nlevels): idx = c.pyapi.long_from_ulonglong( c.context.get_constant(types.int64, i)) code_data = c.pyapi.object_getitem(py_codes_data, idx) native_codes_data.append( unbox_array(codes_types[i], code_data, c).value) c.pyapi.decref(code_data) c.pyapi.decref(py_codes_data) multi_index.codes = c.context.make_tuple(c.builder, typ.codes, native_codes_data) if typ.is_named: name_obj = c.pyapi.object_getattr_string(val, "name") multi_index.name = numba.cpython.unicode.unbox_unicode_str( types.unicode_type, name_obj, c).value c.pyapi.decref(name_obj) is_error = cgutils.is_not_null(c.builder, c.pyapi.err_occurred()) return NativeValue(multi_index._getvalue(), is_error=is_error)
def unbox_series(typ, val, c): arr_obj = c.pyapi.object_getattr_string(val, "values") series = cgutils.create_struct_proxy(typ)(c.context, c.builder) series.data = _unbox_series_data(typ.dtype, typ.data, arr_obj, c).value # TODO: other indices if typ.index == string_array_type: index_obj = c.pyapi.object_getattr_string(val, "index") series.index = unbox_str_series(string_array_type, index_obj, c).value if isinstance(typ.index, types.Array): index_obj = c.pyapi.object_getattr_string(val, "index") index_data = c.pyapi.object_getattr_string(index_obj, "_data") series.index = unbox_array(typ.index, index_data, c).value if typ.is_named: name_obj = c.pyapi.object_getattr_string(val, "name") series.name = numba.cpython.unicode.unbox_unicode_str( string_type, name_obj, c).value # TODO: handle index and name c.pyapi.decref(arr_obj) return NativeValue(series._getvalue())
def unbox_dataframe(typ, val, c): """unbox dataframe to an empty DataFrame struct columns will be extracted later if necessary. """ n_cols = len(typ.columns) column_strs = [ numba.cpython.unicode.make_string_from_constant( c.context, c.builder, string_type, a) for a in typ.columns ] # create dataframe struct and store values dataframe = cgutils.create_struct_proxy(typ)(c.context, c.builder) errorptr = cgutils.alloca_once_value(c.builder, cgutils.false_bit) col_list_type = types.List(string_type) ok, inst = listobj.ListInstance.allocate_ex(c.context, c.builder, col_list_type, n_cols) with c.builder.if_else(ok, likely=True) as (if_ok, if_not_ok): with if_ok: inst.size = c.context.get_constant(types.intp, n_cols) for i, column_str in enumerate(column_strs): inst.setitem(c.context.get_constant(types.intp, i), column_str, incref=False) dataframe.columns = inst.value with if_not_ok: c.builder.store(cgutils.true_bit, errorptr) # If an error occurred, drop the whole native list with c.builder.if_then(c.builder.load(errorptr)): c.context.nrt.decref(c.builder, col_list_type, inst.value) _, data_typs_map, types_order = get_structure_maps(typ.data, typ.columns) for col_typ in types_order: type_id, col_indices = data_typs_map[col_typ] n_type_cols = len(col_indices) list_type = types.List(col_typ) ok, inst = listobj.ListInstance.allocate_ex(c.context, c.builder, list_type, n_type_cols) with c.builder.if_else(ok, likely=True) as (if_ok, if_not_ok): with if_ok: inst.size = c.context.get_constant(types.intp, n_type_cols) for i, col_idx in enumerate(col_indices): series_obj = c.pyapi.object_getattr_string( val, typ.columns[col_idx]) arr_obj = c.pyapi.object_getattr_string( series_obj, "values") ty_series = typ.data[col_idx] if isinstance(ty_series, types.Array): native_val = unbox_array(typ.data[col_idx], arr_obj, c) elif ty_series == string_array_type: native_val = unbox_str_series(string_array_type, series_obj, c) inst.setitem(c.context.get_constant(types.intp, i), native_val.value, incref=False) dataframe.data = c.builder.insert_value( dataframe.data, inst.value, type_id) with if_not_ok: c.builder.store(cgutils.true_bit, errorptr) # If an error occurred, drop the whole native list with c.builder.if_then(c.builder.load(errorptr)): c.context.nrt.decref(c.builder, list_type, inst.value) index_obj = c.pyapi.object_getattr_string(val, "index") dataframe.index = _unbox_index_data(typ.index, index_obj, c).value c.pyapi.decref(index_obj) dataframe.parent = val # increase refcount of stored values if c.context.enable_nrt: # TODO: other objects? for var in column_strs: c.context.nrt.incref(c.builder, string_type, var) return NativeValue(dataframe._getvalue(), is_error=c.builder.load(errorptr))
def unbox_Categorical(typ, val, c): codes = c.pyapi.object_getattr_string(val, "codes") native_value = boxing.unbox_array(typ.codes, codes, c) c.pyapi.decref(codes) return native_value