Beispiel #1
0
    def hpat_pandas_stringmethods_casefold_impl(self):
        mask = get_nan_mask(self._data._data)
        item_count = len(self._data)
        res_list = [''] * item_count
        for idx in numba.prange(item_count):
            res_list[idx] = self._data._data[idx].casefold()
        str_arr = create_str_arr_from_list(res_list)
        result = str_arr_set_na_by_mask(str_arr, mask)

        return pandas.Series(result, self._data._index, name=self._data._name)
Beispiel #2
0
        def test_impl(x):
            # make native encoded unicode string via StringArrayType
            data_as_str_arr = create_str_arr_from_list([x, ])
            data_ptr, size = get_str_view_data(data_as_str_arr, 0)

            # actual test: create string view pointing to data and convert it back to unicode
            str_view = string_view_create_with_data(data_ptr, size)
            as_unicode = str(str_view)
            check_equal = as_unicode == data_as_str_arr[0]  # for extending the lifetime of str_arr
            return as_unicode, check_equal
Beispiel #3
0
    def sdc_reindex_series_impl(arr, index, name, by_index):

        # no reindexing is needed if indexes are equal
        if range_indexes == True:  # noqa
            equal_indexes = numpy_like.array_equal(index, by_index)
        elif int64_indexes == True:  # noqa
            equal_indexes = numpy_like.array_equal(index, by_index)
        else:
            equal_indexes = False
        if (index is by_index or equal_indexes):
            return pandas.Series(data=arr, index=by_index, name=name)

        if data_is_str_arr == True:  # noqa
            _res_data = [''] * len(by_index)
            res_data_nan_mask = numpy.zeros(len(by_index), dtype=types.bool_)
        else:
            _res_data = numpy.empty(len(by_index), dtype=data_dtype)

        # build a dict of self.index values to their positions:
        map_index_to_position = Dict.empty(key_type=index_dtype,
                                           value_type=types.int32)

        for i, value in enumerate(index):
            if value in map_index_to_position:
                raise ValueError("cannot reindex from a duplicate axis")
            else:
                map_index_to_position[value] = i

        index_mismatch = 0
        for i in numba.prange(len(by_index)):
            val = by_index[i]
            if val in map_index_to_position:
                pos_in_self = map_index_to_position[val]
                _res_data[i] = arr[pos_in_self]
                if data_is_str_arr == True:  # noqa
                    res_data_nan_mask[i] = isna(arr, i)
            else:
                index_mismatch += 1
        if index_mismatch:
            msg = "Unalignable boolean Series provided as indexer " + \
                  "(index of the boolean Series and of the indexed object do not match)."
            raise IndexingError(msg)

        if data_is_str_arr == True:  # noqa
            res_data = create_str_arr_from_list(_res_data)
            str_arr_set_na_by_mask(res_data, res_data_nan_mask)
        else:
            res_data = _res_data

        return pandas.Series(data=res_data, index=by_index, name=name)
    def hpat_pandas_stringmethods_upper_impl(self):
        mask = get_nan_mask(self._data._data)
        item_count = len(self._data)
        result = [''] * item_count

        for it in numba.prange(item_count):
            item = self._data._data[it]
            if len(item) > 0:
                result[it] = item.upper()
            else:
                result[it] = item

        str_arr = create_str_arr_from_list(result)
        result = str_arr_set_na_by_mask(str_arr, mask)

        return pandas.Series(result, self._data._index, name=self._data._name)
    def impl(self, to_strip=None):
        mask = get_nan_mask(self._data._data)
        item_count = len(self._data)
        res_list = [''] * item_count

        for it in range(item_count):
            item = self._data._data[it]
            if len(item) > 0:
                res_list[it] = usecase(item, to_strip)
            else:
                res_list[it] = item

        str_arr = create_str_arr_from_list(res_list)
        result = str_arr_set_na_by_mask(str_arr, mask)

        return pandas.Series(result, self._data._index, name=self._data._name)
Beispiel #6
0
    def getitem_by_mask_impl(arr, idx):
        chunks = parallel_chunks(len(arr))
        arr_len = numpy.empty(len(chunks), dtype=numpy.int64)
        length = 0

        for i in prange(len(chunks)):
            chunk = chunks[i]
            res = 0
            for j in range(chunk.start, chunk.stop):
                if idx[j]:
                    res += 1
            length += res
            arr_len[i] = res

        if is_str_arr == True:  # noqa
            result_data = [''] * length
            result_nan_mask = numpy.empty(shape=length, dtype=types.bool_)
        else:
            result_data = numpy.empty(shape=length, dtype=res_dtype)
        for i in prange(len(chunks)):
            chunk = chunks[i]
            new_start = int(sum(arr_len[0:i]))
            current_pos = new_start

            for j in range(chunk.start, chunk.stop):
                if idx[j]:
                    if is_range == True:  # noqa
                        value = arr.start + arr.step * j
                    else:
                        value = arr[j]
                    result_data[current_pos] = value
                    if is_str_arr == True:  # noqa
                        result_nan_mask[current_pos] = isna(arr, j)
                    current_pos += 1

        if is_str_arr == True:  # noqa
            result_data_as_str_arr = create_str_arr_from_list(result_data)
            str_arr_set_na_by_mask(result_data_as_str_arr, result_nan_mask)
            return result_data_as_str_arr
        else:
            return result_data
Beispiel #7
0
 def _sdc_asarray_impl(data):
     return create_str_arr_from_list(data)