def _sdc_take_list_str_impl(data, indexes): res_size = 0 for i in numba.prange(len(indexes)): res_size += len(indexes[i]) nan_mask = numpy.zeros(res_size, dtype=numpy.bool_) num_total_bytes = 0 for i in numba.prange(len(indexes)): start = 0 for l in range(len(indexes[0:i])): start += len(indexes[l]) current_pos = start for j in range(len(indexes[i])): num_total_bytes += get_utf8_size(data[indexes[i][j]]) if isna(data, indexes[i][j]): nan_mask[current_pos] = True current_pos += 1 res_arr = pre_alloc_string_array(res_size, num_total_bytes) for i in numba.prange(len(indexes)): start = 0 for l in range(len(indexes[0:i])): start += len(indexes[l]) current_pos = start for j in range(len(indexes[i])): res_arr[current_pos] = data[indexes[i][j]] if nan_mask[current_pos]: str_arr_set_na(res_arr, current_pos) current_pos += 1 return res_arr
def sdc_fillna_str_impl(self, inplace=False, value=None): n = len(self) num_chars = 0 # get total chars in new array for i in prange(n): s = self[i] if sdc.hiframes.api.isna(self, i): num_chars += get_utf8_size(value) else: num_chars += get_utf8_size(s) filled_data = pre_alloc_string_array(n, num_chars) for i in prange(n): if sdc.hiframes.api.isna(self, i): filled_data[i] = value else: filled_data[i] = self[i] return filled_data
def _str_replace_noregex_impl(str_arr, pat, val): numba.parfor.init_prange() n = len(str_arr) n_total_chars = 0 str_list = sdc.str_ext.alloc_str_list(n) for i in numba.parfor.internal_prange(n): out_str = str_arr[i].replace(pat, val) str_list[i] = out_str n_total_chars += get_utf8_size(out_str) numba.parfor.init_prange() out_arr = pre_alloc_string_array(n, n_total_chars) for i in numba.parfor.internal_prange(n): _str = str_list[i] out_arr[i] = _str return sdc.hiframes.api.init_series(out_arr)
def sdc_astype_number_to_string_impl(self, dtype): num_bytes = 0 arr_len = len(self) # Get total bytes for new array for i in prange(arr_len): item = self[i] num_bytes += get_utf8_size(str(item)) data = pre_alloc_string_array(arr_len, num_bytes) for i in range(arr_len): item = self[i] data[i] = str(item) # TODO: check NA return data
def _sdc_take_str_arr_impl(data, indexes): res_size = len(indexes) nan_mask = numpy.zeros(res_size, dtype=numpy.bool_) num_total_bytes = 0 for i in numba.prange(res_size): num_total_bytes += get_utf8_size(data[indexes[i]]) if isna(data, indexes[i]): nan_mask[i] = True res_arr = pre_alloc_string_array(res_size, num_total_bytes) for i in numpy.arange(res_size): res_arr[i] = data[indexes[i]] if nan_mask[i]: str_arr_set_na(res_arr, i) return res_arr
def sdc_astype_number_to_string_impl(self, dtype): num_bytes = 0 arr_len = len(self) # Get total bytes for new array for i in np.arange(arr_len): # FIXME_Numba#6969: prange segfaults, use it when resolved item = self[i] num_bytes += get_utf8_size(str(item)) data = pre_alloc_string_array(arr_len, num_bytes) for i in range(arr_len): item = self[i] data[i] = str(item) # TODO: check NA return data