Beispiel #1
0
def test_pyarrow_not_installed_raises():
    msg = re.escape("pyarrow>=1.0.0 is required for PyArrow backed")

    with pytest.raises(ImportError, match=msg):
        StringDtype(storage="pyarrow")

    with pytest.raises(ImportError, match=msg):
        ArrowStringArray([])

    with pytest.raises(ImportError, match=msg):
        ArrowStringArray._from_sequence(["a", None, "b"])
Beispiel #2
0
    def __from_arrow__(
            self,
            array: pyarrow.Array | pyarrow.ChunkedArray) -> BaseStringArray:
        """
        Construct StringArray from pyarrow Array/ChunkedArray.
        """
        if self.storage == "pyarrow":
            from pandas.core.arrays.string_arrow import ArrowStringArray

            return ArrowStringArray(array)
        else:

            import pyarrow

            if isinstance(array, pyarrow.Array):
                chunks = [array]
            else:
                # pyarrow.ChunkedArray
                chunks = array.chunks

            results = []
            for arr in chunks:
                # using _from_sequence to ensure None is converted to NA
                str_arr = StringArray._from_sequence(np.array(arr))
                results.append(str_arr)

        if results:
            return StringArray._concat_same_type(results)
        else:
            return StringArray(np.array([], dtype="object"))
Beispiel #3
0
def test_setitem(multiple_chunks, key, value, expected):
    import pyarrow as pa

    result = pa.array(list("abcde"))
    expected = pa.array(expected)

    if multiple_chunks:
        result = pa.chunked_array([result[:3], result[3:]])
        expected = pa.chunked_array([expected[:3], expected[3:]])

    result = ArrowStringArray(result)
    expected = ArrowStringArray(expected)

    result[key] = value
    tm.assert_equal(result, expected)
    assert result._data.num_chunks == expected._data.num_chunks
Beispiel #4
0
def test_constructor_not_string_type_raises(array, chunked):
    arr = array.array([1, 2, 3])
    if chunked:
        if array is np:
            pytest.skip("chunked not applicable to numpy array")
        arr = pa.chunked_array(arr)
    if array is np:
        msg = "Unsupported type '<class 'numpy.ndarray'>' for ArrowStringArray"
    else:
        msg = re.escape(
            "ArrowStringArray requires a PyArrow (chunked) array of string type"
        )
    with pytest.raises(ValueError, match=msg):
        ArrowStringArray(arr)
Beispiel #5
0
def test_setitem_invalid_indexer_raises():
    import pyarrow as pa

    arr = ArrowStringArray(pa.array(list("abcde")))

    with pytest.raises(IndexError, match=None):
        arr[5] = "foo"

    with pytest.raises(IndexError, match=None):
        arr[-6] = "foo"

    with pytest.raises(IndexError, match=None):
        arr[[0, 5]] = "foo"

    with pytest.raises(IndexError, match=None):
        arr[[0, -6]] = "foo"

    with pytest.raises(IndexError, match=None):
        arr[[True, True, False]] = "foo"

    with pytest.raises(ValueError, match=None):
        arr[[0, 1]] = ["foo", "bar", "baz"]
Beispiel #6
0
def test_from_sequence_wrong_dtype_raises():
    with pd.option_context("string_storage", "python"):
        ArrowStringArray._from_sequence(["a", None, "c"], dtype="string")

    with pd.option_context("string_storage", "pyarrow"):
        ArrowStringArray._from_sequence(["a", None, "c"], dtype="string")

    with pytest.raises(AssertionError, match=None):
        ArrowStringArray._from_sequence(["a", None, "c"],
                                        dtype="string[python]")

    ArrowStringArray._from_sequence(["a", None, "c"], dtype="string[pyarrow]")

    with pytest.raises(AssertionError, match=None):
        with pd.option_context("string_storage", "python"):
            ArrowStringArray._from_sequence(["a", None, "c"],
                                            dtype=StringDtype())

    with pd.option_context("string_storage", "pyarrow"):
        ArrowStringArray._from_sequence(["a", None, "c"], dtype=StringDtype())

    with pytest.raises(AssertionError, match=None):
        ArrowStringArray._from_sequence(["a", None, "c"],
                                        dtype=StringDtype("python"))

    ArrowStringArray._from_sequence(["a", None, "c"],
                                    dtype=StringDtype("pyarrow"))

    with pd.option_context("string_storage", "python"):
        StringArray._from_sequence(["a", None, "c"], dtype="string")

    with pd.option_context("string_storage", "pyarrow"):
        StringArray._from_sequence(["a", None, "c"], dtype="string")

    StringArray._from_sequence(["a", None, "c"], dtype="string[python]")

    with pytest.raises(AssertionError, match=None):
        StringArray._from_sequence(["a", None, "c"], dtype="string[pyarrow]")

    with pd.option_context("string_storage", "python"):
        StringArray._from_sequence(["a", None, "c"], dtype=StringDtype())

    with pytest.raises(AssertionError, match=None):
        with pd.option_context("string_storage", "pyarrow"):
            StringArray._from_sequence(["a", None, "c"], dtype=StringDtype())

    StringArray._from_sequence(["a", None, "c"], dtype=StringDtype("python"))

    with pytest.raises(AssertionError, match=None):
        StringArray._from_sequence(["a", None, "c"],
                                   dtype=StringDtype("pyarrow"))