Exemple #1
0
        def _sdc_take_list_str_impl(data, indexes):
            res_size = 0
            for i in numba.prange(len(indexes)):
                res_size += len(indexes[i])
            nan_mask = numpy.zeros(res_size, dtype=numpy.bool_)
            num_total_bytes = 0
            for i in numba.prange(len(indexes)):
                start = 0
                for l in range(len(indexes[0:i])):
                    start += len(indexes[l])
                current_pos = start
                for j in range(len(indexes[i])):
                    num_total_bytes += get_utf8_size(data[indexes[i][j]])
                    if isna(data, indexes[i][j]):
                        nan_mask[current_pos] = True
                    current_pos += 1
            res_arr = pre_alloc_string_array(res_size, num_total_bytes)
            for i in numba.prange(len(indexes)):
                start = 0
                for l in range(len(indexes[0:i])):
                    start += len(indexes[l])
                current_pos = start
                for j in range(len(indexes[i])):
                    res_arr[current_pos] = data[indexes[i][j]]
                    if nan_mask[current_pos]:
                        str_arr_set_na(res_arr, current_pos)
                    current_pos += 1

            return res_arr
Exemple #2
0
            def _series_comp_binop_none_indexes_impl(self,
                                                     other,
                                                     fill_value=None):

                left_size, right_size = len(self._data), len(other._data)
                if left_size != right_size:
                    raise ValueError(
                        "Can only compare identically-labeled Series objects")

                if fill_value_is_none == True:  # noqa
                    result_data = self._data < other._data
                else:
                    result_data = numpy.empty(left_size, dtype=types.bool_)
                    for i in numba.prange(left_size):
                        left_nan = isna(self._data, i)
                        right_nan = isna(other._data, i)
                        _left = fill_value if left_nan else self._data[i]
                        _right = fill_value if right_nan else other._data[i]
                        result_data[i] = False if (
                            left_nan and right_nan) else _left < _right

                return pandas.Series(result_data)
Exemple #3
0
    def sdc_reindex_series_impl(arr, index, name, by_index):

        # no reindexing is needed if indexes are equal
        if range_indexes == True:  # noqa
            equal_indexes = numpy_like.array_equal(index, by_index)
        elif int64_indexes == True:  # noqa
            equal_indexes = numpy_like.array_equal(index, by_index)
        else:
            equal_indexes = False
        if (index is by_index or equal_indexes):
            return pandas.Series(data=arr, index=by_index, name=name)

        if data_is_str_arr == True:  # noqa
            _res_data = [''] * len(by_index)
            res_data_nan_mask = numpy.zeros(len(by_index), dtype=types.bool_)
        else:
            _res_data = numpy.empty(len(by_index), dtype=data_dtype)

        # build a dict of self.index values to their positions:
        map_index_to_position = Dict.empty(key_type=index_dtype,
                                           value_type=types.int32)

        for i, value in enumerate(index):
            if value in map_index_to_position:
                raise ValueError("cannot reindex from a duplicate axis")
            else:
                map_index_to_position[value] = i

        index_mismatch = 0
        for i in numba.prange(len(by_index)):
            val = by_index[i]
            if val in map_index_to_position:
                pos_in_self = map_index_to_position[val]
                _res_data[i] = arr[pos_in_self]
                if data_is_str_arr == True:  # noqa
                    res_data_nan_mask[i] = isna(arr, i)
            else:
                index_mismatch += 1
        if index_mismatch:
            msg = "Unalignable boolean Series provided as indexer " + \
                  "(index of the boolean Series and of the indexed object do not match)."
            raise IndexingError(msg)

        if data_is_str_arr == True:  # noqa
            res_data = create_str_arr_from_list(_res_data)
            str_arr_set_na_by_mask(res_data, res_data_nan_mask)
        else:
            res_data = _res_data

        return pandas.Series(data=res_data, index=by_index, name=name)
        def _series_lt_common_impl(self, other, fill_value=None):

            left_index, right_index = self.index, other.index
            if index_api_supported == True:  # noqa
                if not (left_index is right_index or left_index.equals(right_index)):
                    raise ValueError("Can only compare identically-labeled Series objects")
            else:
                if not (left_index is right_index or numpy_like.array_equal(left_index, right_index)):
                    raise ValueError("Can only compare identically-labeled Series objects")

            res_size = len(left_index)
            if fill_value_is_none == True:  # noqa
                res_data = self._data < other._data
            else:
                res_data = numpy.empty(res_size, dtype=types.bool_)
                for i in numba.prange(res_size):
                    left_nan = isna(self._data, i)
                    right_nan = isna(other._data, i)
                    _left = fill_value if left_nan else self._data[i]
                    _right = fill_value if right_nan else other._data[i]
                    res_data[i] = False if (left_nan and right_nan) else _left < _right

            res_index = sdc_unify_index_types(left_index, right_index)
            return pandas.Series(res_data, index=res_index)
Exemple #5
0
        def _sdc_take_str_arr_impl(data, indexes):
            res_size = len(indexes)
            nan_mask = numpy.zeros(res_size, dtype=numpy.bool_)
            num_total_bytes = 0
            for i in numba.prange(res_size):
                num_total_bytes += get_utf8_size(data[indexes[i]])
                if isna(data, indexes[i]):
                    nan_mask[i] = True

            res_arr = pre_alloc_string_array(res_size, num_total_bytes)
            for i in numpy.arange(res_size):
                res_arr[i] = data[indexes[i]]
                if nan_mask[i]:
                    str_arr_set_na(res_arr, i)

            return res_arr
Exemple #6
0
    def getitem_by_mask_impl(arr, idx):
        chunks = parallel_chunks(len(arr))
        arr_len = numpy.empty(len(chunks), dtype=numpy.int64)
        length = 0

        for i in prange(len(chunks)):
            chunk = chunks[i]
            res = 0
            for j in range(chunk.start, chunk.stop):
                if idx[j]:
                    res += 1
            length += res
            arr_len[i] = res

        if is_str_arr == True:  # noqa
            result_data = [''] * length
            result_nan_mask = numpy.empty(shape=length, dtype=types.bool_)
        else:
            result_data = numpy.empty(shape=length, dtype=res_dtype)
        for i in prange(len(chunks)):
            chunk = chunks[i]
            new_start = int(sum(arr_len[0:i]))
            current_pos = new_start

            for j in range(chunk.start, chunk.stop):
                if idx[j]:
                    if is_range == True:  # noqa
                        value = arr.start + arr.step * j
                    else:
                        value = arr[j]
                    result_data[current_pos] = value
                    if is_str_arr == True:  # noqa
                        result_nan_mask[current_pos] = isna(arr, j)
                    current_pos += 1

        if is_str_arr == True:  # noqa
            result_data_as_str_arr = create_str_arr_from_list(result_data)
            str_arr_set_na_by_mask(result_data_as_str_arr, result_nan_mask)
            return result_data_as_str_arr
        else:
            return result_data