def _box_series_data(dtype, data_typ, val, c): if isinstance(dtype, types.BaseTuple): np_dtype = np.dtype(','.join(str(t) for t in dtype.types), align=True) dtype = numba.numpy_support.from_dtype(np_dtype) if dtype == string_type: arr = box_str_arr(string_array_type, val, c) elif dtype == datetime_date_type: arr = box_datetime_date_array(data_typ, val, c) elif isinstance(dtype, PDCategoricalDtype): arr = box_categorical_array(data_typ, val, c) elif data_typ == string_array_split_view_type: arr = box_str_arr_split_view(data_typ, val, c) elif dtype == types.List(string_type): arr = box_list(list_string_array_type, val, c) else: arr = box_array(data_typ, val, c) if isinstance(dtype, types.Record): o_str = c.context.insert_const_string(c.builder.module, "O") o_str = c.pyapi.string_from_string(o_str) arr = c.pyapi.call_method(arr, "astype", (o_str, )) return arr
def box_dataframe(typ, val, c): context = c.context builder = c.builder n_cols = len(typ.columns) col_names = typ.columns arr_typs = typ.data dtypes = [a.dtype for a in arr_typs] # TODO: check Categorical dataframe = cgutils.create_struct_proxy(typ)(context, builder, value=val) col_arrs = [ builder.extract_value(dataframe.data, i) for i in range(n_cols) ] # df unboxed from Python has_parent = cgutils.is_not_null(builder, dataframe.parent) pyapi = c.pyapi # gil_state = pyapi.gil_ensure() # acquire GIL mod_name = context.insert_const_string(c.builder.module, "pandas") class_obj = pyapi.import_module_noblock(mod_name) df_obj = pyapi.call_method(class_obj, "DataFrame", ()) for i, cname, arr, arr_typ, dtype in zip(range(n_cols), col_names, col_arrs, arr_typs, dtypes): # df['cname'] = boxed_arr # TODO: datetime.date, DatetimeIndex? name_str = context.insert_const_string(c.builder.module, cname) cname_obj = pyapi.string_from_string(name_str) if dtype == string_type: arr_obj = box_str_arr(arr_typ, arr, c) elif isinstance(dtype, PDCategoricalDtype): arr_obj = box_categorical_array(arr_typ, arr, c) # context.nrt.incref(builder, arr_typ, arr) elif arr_typ == string_array_split_view_type: arr_obj = box_str_arr_split_view(arr_typ, arr, c) elif dtype == types.List(string_type): arr_obj = box_list(list_string_array_type, arr, c) # context.nrt.incref(builder, arr_typ, arr) # TODO required? # pyapi.print_object(arr_obj) else: arr_obj = box_array(arr_typ, arr, c) # TODO: is incref required? # context.nrt.incref(builder, arr_typ, arr) pyapi.object_setitem(df_obj, cname_obj, arr_obj) # pyapi.decref(arr_obj) pyapi.decref(cname_obj) # set df.index if necessary if typ.index != types.none: arr_obj = _box_series_data(typ.index.dtype, typ.index, dataframe.index, c) pyapi.object_setattr_string(df_obj, 'index', arr_obj) pyapi.decref(class_obj) # pyapi.gil_release(gil_state) # release GIL return df_obj
def box_dataframe(typ, val, c): context = c.context builder = c.builder col_names = typ.columns arr_typs = typ.data dataframe = cgutils.create_struct_proxy(typ)(context, builder, value=val) pyapi = c.pyapi # gil_state = pyapi.gil_ensure() # acquire GIL mod_name = context.insert_const_string(c.builder.module, "pandas") class_obj = pyapi.import_module_noblock(mod_name) df_dict = pyapi.dict_new() arrays_list_objs = {} for cname, arr_typ in zip(col_names, arr_typs): # df['cname'] = boxed_arr # TODO: datetime.date, DatetimeIndex? name_str = context.insert_const_string(c.builder.module, cname) cname_obj = pyapi.string_from_string(name_str) col_loc = typ.column_loc[cname] type_id, col_id = col_loc.type_id, col_loc.col_id # dataframe.data looks like a tuple(list(array)) # e.g. ([array(int64, 1d, C), array(int64, 1d, C)], [array(float64, 1d, C)]) arrays_list_obj = arrays_list_objs.get(type_id) if arrays_list_obj is None: list_typ = types.List(arr_typ) # extracting list from the tuple list_val = builder.extract_value(dataframe.data, type_id) # getting array from the list to box it then arrays_list_obj = box_list(list_typ, list_val, c) arrays_list_objs[type_id] = arrays_list_obj # PyList_GetItem returns borrowed reference arr_obj = pyapi.list_getitem(arrays_list_obj, col_id) pyapi.dict_setitem(df_dict, cname_obj, arr_obj) pyapi.decref(cname_obj) df_obj = pyapi.call_method(class_obj, "DataFrame", (df_dict,)) pyapi.decref(df_dict) # set df.index if necessary if typ.index != types.none: index_obj = _box_index_data(typ.index, dataframe.index, c) pyapi.object_setattr_string(df_obj, 'index', index_obj) pyapi.decref(index_obj) for arrays_list_obj in arrays_list_objs.values(): pyapi.decref(arrays_list_obj) pyapi.decref(class_obj) # pyapi.gil_release(gil_state) # release GIL return df_obj
def unbox_dataframe(typ, val, c): """unbox dataframe to an empty DataFrame struct columns will be extracted later if necessary. """ n_cols = len(typ.columns) # create dataframe struct and store values dataframe = cgutils.create_struct_proxy(typ)(c.context, c.builder) errorptr = cgutils.alloca_once_value(c.builder, cgutils.false_bit) _, data_typs_map, types_order = get_structure_maps(typ.data, typ.columns) for col_typ in types_order: type_id, col_indices = data_typs_map[col_typ] n_type_cols = len(col_indices) list_type = types.List(col_typ) ok, inst = listobj.ListInstance.allocate_ex(c.context, c.builder, list_type, n_type_cols) with c.builder.if_else(ok, likely=True) as (if_ok, if_not_ok): with if_ok: inst.size = c.context.get_constant(types.intp, n_type_cols) for i, col_idx in enumerate(col_indices): series_obj = c.pyapi.object_getattr_string(val, typ.columns[col_idx]) arr_obj = c.pyapi.object_getattr_string(series_obj, "values") ty_series = typ.data[col_idx] # FIXME: CategoricalType has wrong dtype attribute value (i.e. dtype of codes) # current implementation offers pd_dtype for this purpose, so use it column_dtype = ty_series.pd_dtype if isinstance(ty_series, Categorical) else ty_series.dtype native_val = _unbox_series_data(column_dtype, ty_series, arr_obj, c) inst.setitem(c.context.get_constant(types.intp, i), native_val.value, incref=False) c.pyapi.decref(arr_obj) c.pyapi.decref(series_obj) dataframe.data = c.builder.insert_value(dataframe.data, inst.value, type_id) with if_not_ok: c.builder.store(cgutils.true_bit, errorptr) # If an error occurred, drop the whole native list with c.builder.if_then(c.builder.load(errorptr)): c.context.nrt.decref(c.builder, list_type, inst.value) index_obj = c.pyapi.object_getattr_string(val, "index") dataframe.index = _unbox_index_data(typ.index, index_obj, c).value c.pyapi.decref(index_obj) dataframe.parent = val return NativeValue(dataframe._getvalue(), is_error=c.builder.load(errorptr))
def _infer_series_list_dtype(S): for i in range(len(S)): first_val = S.iloc[i] if not isinstance(first_val, list): raise ValueError("data type for column {} not supported".format( S.name)) if len(first_val) > 0: # TODO: support more types if isinstance(first_val[0], str): return types.List(string_type) else: raise ValueError( "data type for column {} not supported".format(S.name)) raise ValueError("data type for column {} not supported".format(S.name))
def unbox_dataframe(typ, val, c): """unbox dataframe to an empty DataFrame struct columns will be extracted later if necessary. """ n_cols = len(typ.columns) column_strs = [ numba.cpython.unicode.make_string_from_constant( c.context, c.builder, string_type, a) for a in typ.columns ] # create dataframe struct and store values dataframe = cgutils.create_struct_proxy(typ)(c.context, c.builder) errorptr = cgutils.alloca_once_value(c.builder, cgutils.false_bit) col_list_type = types.List(string_type) ok, inst = listobj.ListInstance.allocate_ex(c.context, c.builder, col_list_type, n_cols) with c.builder.if_else(ok, likely=True) as (if_ok, if_not_ok): with if_ok: inst.size = c.context.get_constant(types.intp, n_cols) for i, column_str in enumerate(column_strs): inst.setitem(c.context.get_constant(types.intp, i), column_str, incref=False) dataframe.columns = inst.value with if_not_ok: c.builder.store(cgutils.true_bit, errorptr) # If an error occurred, drop the whole native list with c.builder.if_then(c.builder.load(errorptr)): c.context.nrt.decref(c.builder, col_list_type, inst.value) _, data_typs_map, types_order = get_structure_maps(typ.data, typ.columns) for col_typ in types_order: type_id, col_indices = data_typs_map[col_typ] n_type_cols = len(col_indices) list_type = types.List(col_typ) ok, inst = listobj.ListInstance.allocate_ex(c.context, c.builder, list_type, n_type_cols) with c.builder.if_else(ok, likely=True) as (if_ok, if_not_ok): with if_ok: inst.size = c.context.get_constant(types.intp, n_type_cols) for i, col_idx in enumerate(col_indices): series_obj = c.pyapi.object_getattr_string( val, typ.columns[col_idx]) arr_obj = c.pyapi.object_getattr_string( series_obj, "values") ty_series = typ.data[col_idx] if isinstance(ty_series, types.Array): native_val = unbox_array(typ.data[col_idx], arr_obj, c) elif ty_series == string_array_type: native_val = unbox_str_series(string_array_type, series_obj, c) inst.setitem(c.context.get_constant(types.intp, i), native_val.value, incref=False) dataframe.data = c.builder.insert_value( dataframe.data, inst.value, type_id) with if_not_ok: c.builder.store(cgutils.true_bit, errorptr) # If an error occurred, drop the whole native list with c.builder.if_then(c.builder.load(errorptr)): c.context.nrt.decref(c.builder, list_type, inst.value) index_obj = c.pyapi.object_getattr_string(val, "index") dataframe.index = _unbox_index_data(typ.index, index_obj, c).value c.pyapi.decref(index_obj) dataframe.parent = val # increase refcount of stored values if c.context.enable_nrt: # TODO: other objects? for var in column_strs: c.context.nrt.incref(c.builder, string_type, var) return NativeValue(dataframe._getvalue(), is_error=c.builder.load(errorptr))