Esempio n. 1
0
        def _sdc_take_list_str_impl(data, indexes):
            res_size = 0
            for i in numba.prange(len(indexes)):
                res_size += len(indexes[i])
            nan_mask = numpy.zeros(res_size, dtype=numpy.bool_)
            num_total_bytes = 0
            for i in numba.prange(len(indexes)):
                start = 0
                for l in range(len(indexes[0:i])):
                    start += len(indexes[l])
                current_pos = start
                for j in range(len(indexes[i])):
                    num_total_bytes += get_utf8_size(data[indexes[i][j]])
                    if isna(data, indexes[i][j]):
                        nan_mask[current_pos] = True
                    current_pos += 1
            res_arr = pre_alloc_string_array(res_size, num_total_bytes)
            for i in numba.prange(len(indexes)):
                start = 0
                for l in range(len(indexes[0:i])):
                    start += len(indexes[l])
                current_pos = start
                for j in range(len(indexes[i])):
                    res_arr[current_pos] = data[indexes[i][j]]
                    if nan_mask[current_pos]:
                        str_arr_set_na(res_arr, current_pos)
                    current_pos += 1

            return res_arr
Esempio n. 2
0
            def sdc_fillna_str_impl(self, inplace=False, value=None):
                n = len(self)
                num_chars = 0
                # get total chars in new array
                for i in prange(n):
                    s = self[i]
                    if sdc.hiframes.api.isna(self, i):
                        num_chars += get_utf8_size(value)
                    else:
                        num_chars += get_utf8_size(s)

                filled_data = pre_alloc_string_array(n, num_chars)
                for i in prange(n):
                    if sdc.hiframes.api.isna(self, i):
                        filled_data[i] = value
                    else:
                        filled_data[i] = self[i]
                return filled_data
Esempio n. 3
0
def _str_replace_noregex_impl(str_arr, pat, val):
    numba.parfor.init_prange()
    n = len(str_arr)
    n_total_chars = 0
    str_list = sdc.str_ext.alloc_str_list(n)
    for i in numba.parfor.internal_prange(n):
        out_str = str_arr[i].replace(pat, val)
        str_list[i] = out_str
        n_total_chars += get_utf8_size(out_str)
    numba.parfor.init_prange()
    out_arr = pre_alloc_string_array(n, n_total_chars)
    for i in numba.parfor.internal_prange(n):
        _str = str_list[i]
        out_arr[i] = _str
    return sdc.hiframes.api.init_series(out_arr)
Esempio n. 4
0
        def sdc_astype_number_to_string_impl(self, dtype):
            num_bytes = 0
            arr_len = len(self)

            # Get total bytes for new array
            for i in prange(arr_len):
                item = self[i]
                num_bytes += get_utf8_size(str(item))

            data = pre_alloc_string_array(arr_len, num_bytes)

            for i in range(arr_len):
                item = self[i]
                data[i] = str(item)  # TODO: check NA

            return data
Esempio n. 5
0
        def _sdc_take_str_arr_impl(data, indexes):
            res_size = len(indexes)
            nan_mask = numpy.zeros(res_size, dtype=numpy.bool_)
            num_total_bytes = 0
            for i in numba.prange(res_size):
                num_total_bytes += get_utf8_size(data[indexes[i]])
                if isna(data, indexes[i]):
                    nan_mask[i] = True

            res_arr = pre_alloc_string_array(res_size, num_total_bytes)
            for i in numpy.arange(res_size):
                res_arr[i] = data[indexes[i]]
                if nan_mask[i]:
                    str_arr_set_na(res_arr, i)

            return res_arr
Esempio n. 6
0
        def sdc_astype_number_to_string_impl(self, dtype):
            num_bytes = 0
            arr_len = len(self)

            # Get total bytes for new array
            for i in np.arange(arr_len):    # FIXME_Numba#6969: prange segfaults, use it when resolved
                item = self[i]
                num_bytes += get_utf8_size(str(item))

            data = pre_alloc_string_array(arr_len, num_bytes)

            for i in range(arr_len):
                item = self[i]
                data[i] = str(item)  # TODO: check NA

            return data