def sdc_array_equal_str_arr_impl(A, B): is_index_equal = (len(A) == len(B) and num_total_chars(A) == num_total_chars(B)) for i in numpy.arange(len(A)): if (A[i] != B[i] or str_arr_is_na(A, i) is not str_arr_is_na(B, i)): return False return is_index_equal
def _sdc_str_arr_operator_comp_binop_impl(self, other): if len(self) != len(other): raise ValueError("Mismatch of String Arrays sizes in operator.comp_binop") n = len(self) out_list = [False] * n for i in numba.prange(n): out_list[i] = (self[i] < other[i] and not (str_arr_is_na(self, i) or str_arr_is_na(other, i))) return out_list
def sdc_check_indexes_equal_string_impl(A, B): # TODO: replace with StringArrays comparison is_index_equal = (len(A) == len(B) and num_total_chars(A) == num_total_chars(B)) for i in numpy.arange(len(A)): if (A[i] != B[i] or str_arr_is_na(A, i) is not str_arr_is_na(B, i)): return False return is_index_equal
def _sdc_str_arr_operator_comp_binop_impl(self, other): n = len(other) out_list = [False] * n for i in numba.prange(n): out_list[i] = (self < other[i] and not (str_arr_is_na(other, i))) return out_list
def sdc_join_series_indexes_impl(left, right): # allocate result arrays lsize = len(left) rsize = len(right) est_total_size = int(1.1 * (lsize + rsize)) lidx = numpy.empty(est_total_size, numpy.int64) ridx = numpy.empty(est_total_size, numpy.int64) # use Series.sort_values since argsort for StringArrays not implemented original_left_series = pandas.Series(left) original_right_series = pandas.Series(right) # sort arrays saving the old positions left_series = original_left_series.sort_values(kind='mergesort') right_series = original_right_series.sort_values(kind='mergesort') sorted_left = left_series._index sorted_right = right_series._index i, j, k = 0, 0, 0 while (i < lsize and j < rsize): lidx = _hpat_ensure_array_capacity(k + 1, lidx) ridx = _hpat_ensure_array_capacity(k + 1, ridx) left_index = left[sorted_left[i]] right_index = right[sorted_right[j]] if (left_index < right_index): lidx[k] = sorted_left[i] ridx[k] = -1 i += 1 k += 1 elif (left_index > right_index): lidx[k] = -1 ridx[k] = sorted_right[j] j += 1 k += 1 else: # find ends of sequences of equal index values in left and right ni, nj = i, j while (ni < lsize and left[sorted_left[ni]] == left_index): ni += 1 while (nj < rsize and right[sorted_right[nj]] == right_index): nj += 1 # join the blocks found into results for s in numpy.arange(i, ni, 1): block_size = nj - j to_lidx = numpy.repeat(sorted_left[s], block_size) to_ridx = numpy.array( [sorted_right[k] for k in numpy.arange(j, nj, 1)], numpy.int64) lidx = _hpat_ensure_array_capacity( k + block_size, lidx) ridx = _hpat_ensure_array_capacity( k + block_size, ridx) lidx[k:k + block_size] = to_lidx ridx[k:k + block_size] = to_ridx k += block_size i = ni j = nj # fill the end of joined with remaining part of left or right if i < lsize: block_size = lsize - i lidx = _hpat_ensure_array_capacity(k + block_size, lidx) ridx = _hpat_ensure_array_capacity(k + block_size, ridx) ridx[k:k + block_size] = numpy.repeat(-1, block_size) while i < lsize: lidx[k] = sorted_left[i] i += 1 k += 1 elif j < rsize: block_size = rsize - j lidx = _hpat_ensure_array_capacity(k + block_size, lidx) ridx = _hpat_ensure_array_capacity(k + block_size, ridx) lidx[k:k + block_size] = numpy.repeat(-1, block_size) while j < rsize: ridx[k] = sorted_right[j] j += 1 k += 1 # count total number of characters and allocate joined array total_joined_size = k num_chars_in_joined = 0 for i in numpy.arange(total_joined_size): if lidx[i] != -1: num_chars_in_joined += len(left[lidx[i]]) elif ridx[i] != -1: num_chars_in_joined += len(right[ridx[i]]) joined = pre_alloc_string_array(total_joined_size, num_chars_in_joined) # iterate over joined and fill it with indexes using lidx and ridx indexers for i in numpy.arange(total_joined_size): if lidx[i] != -1: joined[i] = left[lidx[i]] if (str_arr_is_na(left, lidx[i])): str_arr_set_na(joined, i) elif ridx[i] != -1: joined[i] = right[ridx[i]] if (str_arr_is_na(right, ridx[i])): str_arr_set_na(joined, i) else: str_arr_set_na(joined, i) return joined, lidx, ridx