def _sdc_take_list_str_impl(data, indexes): res_size = 0 for i in numba.prange(len(indexes)): res_size += len(indexes[i]) nan_mask = numpy.zeros(res_size, dtype=numpy.bool_) num_total_bytes = 0 for i in numba.prange(len(indexes)): start = 0 for l in range(len(indexes[0:i])): start += len(indexes[l]) current_pos = start for j in range(len(indexes[i])): num_total_bytes += get_utf8_size(data[indexes[i][j]]) if isna(data, indexes[i][j]): nan_mask[current_pos] = True current_pos += 1 res_arr = pre_alloc_string_array(res_size, num_total_bytes) for i in numba.prange(len(indexes)): start = 0 for l in range(len(indexes[0:i])): start += len(indexes[l]) current_pos = start for j in range(len(indexes[i])): res_arr[current_pos] = data[indexes[i][j]] if nan_mask[current_pos]: str_arr_set_na(res_arr, current_pos) current_pos += 1 return res_arr
def _series_comp_binop_none_indexes_impl(self, other, fill_value=None): left_size, right_size = len(self._data), len(other._data) if left_size != right_size: raise ValueError( "Can only compare identically-labeled Series objects") if fill_value_is_none == True: # noqa result_data = self._data < other._data else: result_data = numpy.empty(left_size, dtype=types.bool_) for i in numba.prange(left_size): left_nan = isna(self._data, i) right_nan = isna(other._data, i) _left = fill_value if left_nan else self._data[i] _right = fill_value if right_nan else other._data[i] result_data[i] = False if ( left_nan and right_nan) else _left < _right return pandas.Series(result_data)
def sdc_reindex_series_impl(arr, index, name, by_index): # no reindexing is needed if indexes are equal if range_indexes == True: # noqa equal_indexes = numpy_like.array_equal(index, by_index) elif int64_indexes == True: # noqa equal_indexes = numpy_like.array_equal(index, by_index) else: equal_indexes = False if (index is by_index or equal_indexes): return pandas.Series(data=arr, index=by_index, name=name) if data_is_str_arr == True: # noqa _res_data = [''] * len(by_index) res_data_nan_mask = numpy.zeros(len(by_index), dtype=types.bool_) else: _res_data = numpy.empty(len(by_index), dtype=data_dtype) # build a dict of self.index values to their positions: map_index_to_position = Dict.empty(key_type=index_dtype, value_type=types.int32) for i, value in enumerate(index): if value in map_index_to_position: raise ValueError("cannot reindex from a duplicate axis") else: map_index_to_position[value] = i index_mismatch = 0 for i in numba.prange(len(by_index)): val = by_index[i] if val in map_index_to_position: pos_in_self = map_index_to_position[val] _res_data[i] = arr[pos_in_self] if data_is_str_arr == True: # noqa res_data_nan_mask[i] = isna(arr, i) else: index_mismatch += 1 if index_mismatch: msg = "Unalignable boolean Series provided as indexer " + \ "(index of the boolean Series and of the indexed object do not match)." raise IndexingError(msg) if data_is_str_arr == True: # noqa res_data = create_str_arr_from_list(_res_data) str_arr_set_na_by_mask(res_data, res_data_nan_mask) else: res_data = _res_data return pandas.Series(data=res_data, index=by_index, name=name)
def _series_lt_common_impl(self, other, fill_value=None): left_index, right_index = self.index, other.index if index_api_supported == True: # noqa if not (left_index is right_index or left_index.equals(right_index)): raise ValueError("Can only compare identically-labeled Series objects") else: if not (left_index is right_index or numpy_like.array_equal(left_index, right_index)): raise ValueError("Can only compare identically-labeled Series objects") res_size = len(left_index) if fill_value_is_none == True: # noqa res_data = self._data < other._data else: res_data = numpy.empty(res_size, dtype=types.bool_) for i in numba.prange(res_size): left_nan = isna(self._data, i) right_nan = isna(other._data, i) _left = fill_value if left_nan else self._data[i] _right = fill_value if right_nan else other._data[i] res_data[i] = False if (left_nan and right_nan) else _left < _right res_index = sdc_unify_index_types(left_index, right_index) return pandas.Series(res_data, index=res_index)
def _sdc_take_str_arr_impl(data, indexes): res_size = len(indexes) nan_mask = numpy.zeros(res_size, dtype=numpy.bool_) num_total_bytes = 0 for i in numba.prange(res_size): num_total_bytes += get_utf8_size(data[indexes[i]]) if isna(data, indexes[i]): nan_mask[i] = True res_arr = pre_alloc_string_array(res_size, num_total_bytes) for i in numpy.arange(res_size): res_arr[i] = data[indexes[i]] if nan_mask[i]: str_arr_set_na(res_arr, i) return res_arr
def getitem_by_mask_impl(arr, idx): chunks = parallel_chunks(len(arr)) arr_len = numpy.empty(len(chunks), dtype=numpy.int64) length = 0 for i in prange(len(chunks)): chunk = chunks[i] res = 0 for j in range(chunk.start, chunk.stop): if idx[j]: res += 1 length += res arr_len[i] = res if is_str_arr == True: # noqa result_data = [''] * length result_nan_mask = numpy.empty(shape=length, dtype=types.bool_) else: result_data = numpy.empty(shape=length, dtype=res_dtype) for i in prange(len(chunks)): chunk = chunks[i] new_start = int(sum(arr_len[0:i])) current_pos = new_start for j in range(chunk.start, chunk.stop): if idx[j]: if is_range == True: # noqa value = arr.start + arr.step * j else: value = arr[j] result_data[current_pos] = value if is_str_arr == True: # noqa result_nan_mask[current_pos] = isna(arr, j) current_pos += 1 if is_str_arr == True: # noqa result_data_as_str_arr = create_str_arr_from_list(result_data) str_arr_set_na_by_mask(result_data_as_str_arr, result_nan_mask) return result_data_as_str_arr else: return result_data