예제 #1
0
def test_replace_inplace():
    data = np.array([5, 1, 2, 3, 4])
    sr = Series(data)
    psr = pd.Series(data)

    sr_copy = sr.copy()
    psr_copy = psr.copy()

    assert_eq(sr, psr)
    assert_eq(sr_copy, psr_copy)
    sr.replace(5, 0, inplace=True)
    psr.replace(5, 0, inplace=True)
    assert_eq(sr, psr)
    assert_eq(sr_copy, psr_copy)

    sr = Series(data)
    psr = pd.Series(data)

    sr_copy = sr.copy()
    psr_copy = psr.copy()

    assert_eq(sr, psr)
    assert_eq(sr_copy, psr_copy)
    sr.replace({5: 0, 3: -5})
    psr.replace({5: 0, 3: -5})
    assert_eq(sr, psr)
    assert_eq(sr_copy, psr_copy)
    srr = sr.replace()
    psrr = psr.replace()
    assert_eq(srr, psrr)

    psr = pd.Series(["one", "two", "three"], dtype="category")
    sr = Series.from_pandas(psr)

    sr_copy = sr.copy()
    psr_copy = psr.copy()

    assert_eq(sr, psr)
    assert_eq(sr_copy, psr_copy)
    sr.replace("one", "two", inplace=True)
    psr.replace("one", "two", inplace=True)
    assert_eq(sr, psr)
    assert_eq(sr_copy, psr_copy)

    pdf = pd.DataFrame({"A": [0, 1, 2, 3, 4], "B": [5, 6, 7, 8, 9]})
    gdf = DataFrame.from_pandas(pdf)

    pdf_copy = pdf.copy()
    gdf_copy = gdf.copy()
    assert_eq(pdf, gdf)
    assert_eq(pdf_copy, gdf_copy)
    pdf.replace(5, 0, inplace=True)
    gdf.replace(5, 0, inplace=True)
    assert_eq(pdf, gdf)
    assert_eq(pdf_copy, gdf_copy)

    pds = pd.Series([1, 2, 3, 45])
    gds = Series.from_pandas(pds)
    vals = np.array([]).astype(int)

    assert_eq(pds.replace(vals, -1), gds.replace(vals, -1))

    pds.replace(vals, 77, inplace=True)
    gds.replace(vals, 77, inplace=True)
    assert_eq(pds, gds)

    pdf = pd.DataFrame({"a": [1, 2, 3, 4, 5, 666]})
    gdf = DataFrame.from_pandas(pdf)

    assert_eq(pdf.replace({"a": 2}, {"a": -33}),
              gdf.replace({"a": 2}, {"a": -33}))

    assert_eq(
        pdf.replace({"a": [2, 5]}, {"a": [9, 10]}),
        gdf.replace({"a": [2, 5]}, {"a": [9, 10]}),
    )

    assert_eq(
        pdf.replace([], []),
        gdf.replace([], []),
    )

    assert_exceptions_equal(
        lfunc=pdf.replace,
        rfunc=gdf.replace,
        lfunc_args_and_kwargs=([], {
            "to_replace": -1,
            "value": []
        }),
        rfunc_args_and_kwargs=([], {
            "to_replace": -1,
            "value": []
        }),
        compare_error_message=False,
    )
예제 #2
0
def test_replace_inplace():
    data = np.array([5, 1, 2, 3, 4])
    sr = Series(data)
    psr = pd.Series(data)

    sr_copy = sr.copy()
    psr_copy = psr.copy()

    assert_eq(sr, psr)
    assert_eq(sr_copy, psr_copy)
    sr.replace(5, 0, inplace=True)
    psr.replace(5, 0, inplace=True)
    assert_eq(sr, psr)
    assert_eq(sr_copy, psr_copy)

    sr = Series(data)
    psr = pd.Series(data)

    sr_copy = sr.copy()
    psr_copy = psr.copy()

    assert_eq(sr, psr)
    assert_eq(sr_copy, psr_copy)
    sr.replace({5: 0, 3: -5})
    psr.replace({5: 0, 3: -5})
    assert_eq(sr, psr)
    assert_eq(sr_copy, psr_copy)
    srr = sr.replace()
    psrr = psr.replace()
    assert_eq(srr, psrr)

    psr = pd.Series(["one", "two", "three"], dtype="category")
    sr = Series.from_pandas(psr)

    sr_copy = sr.copy()
    psr_copy = psr.copy()

    assert_eq(sr, psr)
    assert_eq(sr_copy, psr_copy)
    sr.replace("one", "two", inplace=True)
    psr.replace("one", "two", inplace=True)
    assert_eq(sr, psr)
    assert_eq(sr_copy, psr_copy)

    pdf = pd.DataFrame({"A": [0, 1, 2, 3, 4], "B": [5, 6, 7, 8, 9]})
    gdf = DataFrame.from_pandas(pdf)

    pdf_copy = pdf.copy()
    gdf_copy = gdf.copy()
    assert_eq(pdf, gdf)
    assert_eq(pdf_copy, gdf_copy)
    pdf.replace(5, 0, inplace=True)
    gdf.replace(5, 0, inplace=True)
    assert_eq(pdf, gdf)
    assert_eq(pdf_copy, gdf_copy)

    pds = pd.Series([1, 2, 3, 45])
    gds = Series.from_pandas(pds)
    vals = np.array([]).astype(int)

    assert_eq(pds.replace(vals, -1), gds.replace(vals, -1))

    pds.replace(vals, 77, inplace=True)
    gds.replace(vals, 77, inplace=True)
    assert_eq(pds, gds)

    pdf = pd.DataFrame({"a": [1, 2, 3, 4, 5, 666]})
    gdf = DataFrame.from_pandas(pdf)

    assert_eq(pdf.replace({"a": 2}, {"a": -33}),
              gdf.replace({"a": 2}, {"a": -33}))

    assert_eq(
        pdf.replace({"a": [2, 5]}, {"a": [9, 10]}),
        gdf.replace({"a": [2, 5]}, {"a": [9, 10]}),
    )

    assert_eq(
        pdf.replace([], []),
        gdf.replace([], []),
    )

    with pytest.raises(TypeError):
        pdf.replace(-1, [])

    with pytest.raises(TypeError):
        gdf.replace(-1, [])
예제 #3
0
class GpuArrowNodeReader(object):
    def __init__(self, table, index):
        self._table = table
        self._field = table.schema[index]
        self._series = Series(column.as_column(table.column(index)))
        self._series.name = self.name

    def __len__(self):
        return len(self._series)

    @property
    def schema(self):
        return self._table.schema

    @property
    def field_schema(self):
        return self._field

    @property
    def is_dictionary(self):
        return pa.types.is_dictionary(self._field.type)

    @property
    def null_count(self):
        return self._series.null_count

    @property
    def dtype(self):
        return arrow_to_pandas_dtype(self._field.type)

    @property
    def index_dtype(self):
        return self._field.type.index_type.to_pandas_dtype()

    @property
    def name(self):
        return self._field.name

    @property
    def data(self):
        """
        Return the data as the expected dtype
        and with the padding bytes truncated.
        """
        if self.data_raw is not None:
            return self.data_raw.view(
                self.dtype if not self.is_dictionary else self.index_dtype)

    @property
    def null(self):
        """
        Return the null mask with the padding bytes truncated.
        """
        if self.null_raw is not None:
            bits = mask_bitsize
            itemsize = mask_dtype.itemsize
            end = ((len(self) + bits - 1) // bits) * itemsize
            return self.null_raw[:end].view(mask_dtype)

    @property
    def data_raw(self):
        "Accessor for the data buffer as a device array"
        return self._series._column.data_array_view

    @property
    def null_raw(self):
        "Accessor for the null buffer as a device array"
        return self._series._column.mask_array_view

    def make_series(self):
        """Make a Series object out of this node
        """
        return self._series.copy(deep=False)

    def _make_dictionary_series(self):
        """Make a dictionary-encoded series from this node
        """
        assert self.is_dictionary
        return self._series.copy(deep=False)