Esempio n. 1
0
    def __setitem__(self, key, value):
        if isinstance(value, (pd.Index, pd.Series)):
            value = value.to_numpy()
        if isinstance(value, type(self)):
            value = value.to_numpy()

        key = check_array_indexer(self, key)
        scalar_key = is_scalar(key)
        scalar_value = is_scalar(value)
        if scalar_key and not scalar_value:
            raise ValueError("setting an array element with a sequence.")

        # validate new items
        if scalar_value:
            if pd.isna(value):
                value = None
            elif not isinstance(value, str):
                raise ValueError(
                    f"Cannot set non-string value '{value}' into a ArrowStringArray."
                )
        else:
            if not is_array_like(value):
                value = np.asarray(value, dtype=object)
            if len(value) and not lib.is_string_array(value, skipna=True):
                raise ValueError("Must provide strings.")

        if self._use_arrow:
            string_array = np.asarray(self._arrow_array.to_pandas())
            string_array[key] = value
            self._arrow_array = pa.chunked_array([pa.array(string_array)])
        else:
            self._ndarray[key] = value
Esempio n. 2
0
    def fillna(self, value=None, method=None, limit=None):
        from pandas.api.types import is_array_like
        from pandas.util._validators import validate_fillna_kwargs
        from pandas.core.missing import pad_1d, backfill_1d

        value, method = validate_fillna_kwargs(value, method)

        mask = self.isna()

        if is_array_like(value):
            if len(value) != len(self):
                raise ValueError("Length of 'value' does not match. Got ({}) "
                                 " expected {}".format(len(value), len(self)))
            value = value[mask]

        if mask.any():
            if method is not None:
                func = pad_1d if method == "pad" else backfill_1d
                new_values = func(self.astype(object), limit=limit, mask=mask)
                new_values = self._from_sequence(new_values, self._dtype)
            else:
                # fill with value
                new_values = np.asarray(self)
                if isinstance(value, Geometry):
                    value = [value]
                new_values[mask] = value
                new_values = self.__class__(new_values, dtype=self.dtype)
        else:
            new_values = self.copy()
        return new_values
Esempio n. 3
0
    def __getitem__(self, item):
        """Select subset of self.

        Parameters
        ----------
        item: int, slice
            * int: The position in 'self' to get.
            * slice: A slice object, where 'start', 'stop', and 'step' are
            integers or None
            * ndarray: A 1-d boolean NumPy ndarray the same length as 'self'

        Returns
        --------
        item: scalar or ExtensionArray

        Notes
        -----
        For scalar ``item``, return a scalar value suitable for the array's
        type. This should be an instance of ``self.dtype.type``.
        For slice ``key``, return an instance of ``ExtensionArray``, even
        if the slice is length 0 or 1.
        For a boolean mask, return an instance of ``ExtensionArray``, filtered
        to the values where ``item`` is True.
        """
        if isinstance(item, slice):
            start = item.start or 0
            stop = item.stop if item.stop is not None else len(self.data)
            stop = min(stop, len(self.data))
            if stop - start == 0:
                return type(self)(xnd.xnd([], type=self.data.type))

        elif isinstance(item, Iterable):
            if not is_array_like(item):
                item = np.array(item)
            if is_integer_dtype(item):
                return self.take(item)
            elif is_bool_dtype(item):
                indices = np.array(item)
                indices = np.argwhere(indices).flatten()
                return self.take(indices)
            else:
                raise IndexError(
                    "Only integers, slices and integer or boolean \
                    arrays are valid indices.")

        elif is_integer(item):
            if item < 0:
                item += len(self)
            if item >= len(self):
                return None
            else:

                return self.data[item]

        value = self.data[item]
        return type(self)(value)
Esempio n. 4
0
    def fillna(self, value=None, method=None, limit=None):
        """
        Fill NA/NaN values using the specified method.

        Parameters
        ----------
        value : scalar, array-like
            If a scalar value is passed it is used to fill all missing values.
            Alternatively, an array-like 'value' can be given. It's expected
            that the array-like have the same length as 'self'.
        method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
            Method to use for filling holes in reindexed Series
            pad / ffill: propagate last valid observation forward to next valid
            backfill / bfill: use NEXT valid observation to fill gap
        limit : int, default None
            If method is specified, this is the maximum number of consecutive
            NaN values to forward/backward fill. In other words, if there is
            a gap with more than this number of consecutive NaNs, it will only
            be partially filled. If method is not specified, this is the
            maximum number of entries along the entire axis where NaNs will be
            filled.

        Returns
        -------
        filled : ExtensionArray with NA/NaN filled
        """
        from pandas.api.types import is_array_like
        from pandas.util._validators import validate_fillna_kwargs
        from pandas.core.missing import pad_1d, backfill_1d

        value, method = validate_fillna_kwargs(value, method)

        mask = self.isna()

        if is_array_like(value):
            if len(value) != len(self):
                raise ValueError(
                    "Length of 'value' does not match. Got ({}) "
                    " expected {}".format(len(value), len(self))
                )
            value = value[mask]

        if mask.any():
            if method is not None:
                func = pad_1d if method == "pad" else backfill_1d
                new_values = func(self.astype(object), limit=limit, mask=mask)
                new_values = self._from_sequence(new_values, dtype=self.dtype)
            else:
                # fill with value
                new_values = self.copy()
                new_values[mask] = value
        else:
            new_values = self.copy()
        return new_values
Esempio n. 5
0
    def fillna(self, value=None, method=None, limit=None):
        """
        Fill NA/NaN values using the specified method.

        Parameters
        ----------
        value : scalar, array-like
            If a scalar value is passed it is used to fill all missing values.
            Alternatively, an array-like 'value' can be given. It's expected
            that the array-like have the same length as 'self'.
        method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
            Method to use for filling holes in reindexed Series
            pad / ffill: propagate last valid observation forward to next valid
            backfill / bfill: use NEXT valid observation to fill gap
        limit : int, default None
            If method is specified, this is the maximum number of consecutive
            NaN values to forward/backward fill. In other words, if there is
            a gap with more than this number of consecutive NaNs, it will only
            be partially filled. If method is not specified, this is the
            maximum number of entries along the entire axis where NaNs will be
            filled.

        Returns
        -------
        filled : ExtensionArray with NA/NaN filled
        """
        from pandas.api.types import is_array_like
        from pandas.util._validators import validate_fillna_kwargs
        from pandas.core.missing import pad_1d, backfill_1d

        value, method = validate_fillna_kwargs(value, method)

        mask = self.isna()

        if is_array_like(value):
            if len(value) != len(self):
                raise ValueError("Length of 'value' does not match. Got ({}) "
                                 " expected {}".format(len(value), len(self)))
            value = value[mask]

        if mask.any():
            if method is not None:
                func = pad_1d if method == 'pad' else backfill_1d
                new_values = func(self.astype(object), limit=limit,
                                  mask=mask)
                new_values = self._from_sequence(new_values, dtype=self.dtype)
            else:
                # fill with value
                new_values = self.copy()
                new_values[mask] = value
        else:
            new_values = self.copy()
        return new_values
Esempio n. 6
0
    def __init__(self, array, dtype=None, copy=None):
        # Choose default dtype for empty arrays
        try:
            if len(array) == 0 and dtype is None:
                dtype = 'float64'
        except:
            # len failed
            pass

        # See if we can determine arrow array type
        if isinstance(dtype, GeometryDtype):
            # Use arrow type as-is
            arrow_dtype = dtype.arrow_dtype
        elif isinstance(dtype, pa.DataType):
            arrow_dtype = dtype
        elif dtype is not None and dtype != np.dtype('object'):
            # Scalar element dtype
            arrow_dtype = self._arrow_type_from_numpy_element_dtype(dtype)
        else:
            # Let arrow infer type
            arrow_dtype = None

        # Unwrap GeometryList elements to numpy arrays
        if is_array_like(array) or isinstance(array, list):
            array = [_unwrap_geometry(el, self._element_type) for el in array]
            array = pa.array(array, type=arrow_dtype)
        elif isinstance(array, pa.Array):
            # Nothing to do
            pass
        elif isinstance(array, pa.ChunkedArray):
            array = pa.concat_arrays(array.chunks)
        else:
            raise ValueError(
                "Unsupported type passed for {}: {}".format(
                    self.__class__.__name__, type(array)
                )
            )

        # Save off pyarrow array
        self.data = array

        # Compute types
        np_type = self._numpy_element_dtype_from_arrow_type(self.data.type)
        self._numpy_element_type = np.dtype(np_type)
        self._dtype = self._dtype_class(np_type)

        # Initialize backing property for spatial index
        self._sindex = None
Esempio n. 7
0
 def __init__(self, array, dtype=None, copy=None):
     # Copy is not used at the moment. It's only affect will be when we
     # allow array to be a FletcherArray
     if is_array_like(array) or isinstance(array, list):
         self.data = pa.chunked_array([pa.array(array, type=dtype)])
     elif isinstance(array, pa.Array):
         # TODO: Assert dtype
         self.data = pa.chunked_array([array])
     elif isinstance(array, pa.ChunkedArray):
         # TODO: Assert dtype
         self.data = array
     else:
         raise ValueError("Unsupported type passed for {}: {}".format(
             self.__class__.__name__, type(array)))
     self._dtype = FletcherDtype(self.data.type)
     self.offsets = self._calculate_chunk_offsets()
Esempio n. 8
0
    def fillna(self, value=None, method=None, limit=None):
        from pandas.util._validators import validate_fillna_kwargs
        value, method = validate_fillna_kwargs(value, method)

        mask = self.isna()
        from pandas.api.types import is_array_like, infer_dtype
        if is_array_like(value):
            if len(value) != len(self):
                raise ValueError(
                    f"Length of 'value' does not match. Got ({len(value)}) "
                    f"expected {len(self)}")
            value = value[mask]
        else:
            # because pandas infer_type(scalar) cant work on scalar value, we put the value into a list
            value = [value]
        if mask.any():
            if method is not None:
                from pandas.core.missing import pad_1d
                from pandas.core.missing import backfill_1d
                func = pad_1d if method == "pad" else backfill_1d
                new_values = func(self.astype(object), limit=limit, mask=mask)
                new_values = self._from_sequence(new_values, dtype=self.dtype)
                # raise NotImplementedError("not support fillna with method")
            else:
                # translate value
                if not isinstance(getattr(value, "dtype", value),
                                  (GeoDtype, type(None))):
                    inferred_type = infer_dtype(value, skipna=True)
                    if inferred_type == "string":
                        value = arctern.ST_GeomFromText(value)
                    elif inferred_type == "bytes":
                        pass
                    else:
                        raise ValueError(
                            "can only fillna with wkt formed string or wkb formed bytes"
                        )

                # fill with value
                new_values = self.copy()
                new_values[mask] = value
        else:
            new_values = self.copy()
        return new_values
Esempio n. 9
0
    def __init__(self, array, dtype=None, copy=None):
        # Copy is not used at the moment. It's only affect will be when we
        # allow array to be a FletcherArray
        if is_array_like(array) or isinstance(array, list):
            self.data = pa.chunked_array([pa.array(array, type=dtype)])
        elif isinstance(array, pa.Array):
            # ARROW-7008: pyarrow.chunked_array([array]) fails on array with all-None buffers
            if len(array) == 0 and all((b is None for b in array.buffers())):
                array = pa.array([], type=array.type)
            # TODO: Assert dtype
            self.data = pa.chunked_array([array])
        elif isinstance(array, pa.ChunkedArray):
            # TODO: Assert dtype
            self.data = array
        else:
            raise ValueError(
                f"Unsupported type passed for {self.__class__.__name__}: {type(array)}"
            )

        self._dtype = FletcherDtype(self.data.type)
Esempio n. 10
0
def infer_dtype_bydata(data):
    d_type = DataType.UNKNOWN
    if is_scalar(data):
        d_type = infer_dtype_by_scaladata(data)
        return d_type

    if is_list_like(data) or is_array_like(data):
        failed = False
        try:
            type_str = infer_dtype(data)
        except TypeError:
            failed = True
        if not failed:
            d_type = dtype_str_map.get(type_str, DataType.UNKNOWN)
            if is_numeric_datatype(d_type):
                d_type = DataType.FLOAT_VECTOR
            else:
                d_type = DataType.UNKNOWN

            return d_type

    if d_type == DataType.UNKNOWN:
        try:
            elem = data[0]
        except:
            elem = None

        if elem is not None and is_scalar(elem):
            d_type = infer_dtype_by_scaladata(elem)

    if d_type == DataType.UNKNOWN:
        _dtype = getattr(data, "dtype", None)

        if _dtype is not None:
            d_type = map_numpy_dtype_to_datatype(_dtype)

    return d_type
Esempio n. 11
0
def to_geometry_array(data, dtype=None):
    from . import (LineArray, MultiLineArray, MultiPointArray,
                   MultiPolygonArray, PointArray, PolygonArray, RingArray)
    if sg is not None:
        shapely_to_spatialpandas = {
            sg.Point: PointArray,
            sg.MultiPoint: MultiPointArray,
            sg.LineString: LineArray,
            sg.LinearRing: RingArray,
            sg.MultiLineString: MultiLineArray,
            sg.Polygon: PolygonArray,
            sg.MultiPolygon: MultiPolygonArray,
        }
    else:
        shapely_to_spatialpandas = {}

    # Normalize dtype from string
    if dtype is not None:
        dtype = pd.array([], dtype=dtype).dtype

    err_msg = "Unable to convert data argument to a GeometryList array"
    if is_geometry_array(data):
        # Keep data as is
        pass
    elif (is_array_like(data) or isinstance(data, (list, tuple))
          or gp and isinstance(data, (gp.GeoSeries, gp.array.GeometryArray))):

        if dtype is not None:
            data = dtype.construct_array_type()(data, dtype=dtype)
        elif len(data) == 0:
            raise ValueError(
                "Cannot infer spatialpandas geometry type from empty collection "
                "without dtype.\n")
        else:
            # Check for list/array of geometry scalars.
            first_valid = None
            for val in data:
                if val is not None:
                    first_valid = val
                    break
            if isinstance(first_valid, Geometry):
                # Pass data to constructor of appropriate geometry array
                data = first_valid.construct_array_type()(data)
            elif type(first_valid) in shapely_to_spatialpandas:
                if isinstance(first_valid, sg.LineString):
                    # Handle mix of sg.LineString and sg.MultiLineString
                    for val in data:
                        if isinstance(val, sg.MultiLineString):
                            first_valid = val
                            break
                elif isinstance(first_valid, sg.Polygon):
                    # Handle mix of sg.Polygon and sg.MultiPolygon
                    for val in data:
                        if isinstance(val, sg.MultiPolygon):
                            first_valid = val
                            break

                array_type = shapely_to_spatialpandas[type(first_valid)]
                data = array_type.from_geopandas(data)
            else:
                raise ValueError(err_msg)
    else:
        raise ValueError(err_msg)
    return data
Esempio n. 12
0
    def take(self,
             indices: Sequence[int],
             allow_fill: bool = False,
             fill_value: Any = None):
        """
        Take elements from an array.

        Parameters
        ----------
        indices : sequence of int
            Indices to be taken.
        allow_fill : bool, default False
            How to handle negative values in `indices`.

            * False: negative values in `indices` indicate positional indices
              from the right (the default). This is similar to
              :func:`numpy.take`.

            * True: negative values in `indices` indicate
              missing values. These values are set to `fill_value`. Any other
              other negative values raise a ``ValueError``.

        fill_value : any, optional
            Fill value to use for NA-indices when `allow_fill` is True.
            This may be ``None``, in which case the default NA value for
            the type, ``self.dtype.na_value``, is used.

            For many ExtensionArrays, there will be two representations of
            `fill_value`: a user-facing "boxed" scalar, and a low-level
            physical NA value. `fill_value` should be the user-facing version,
            and the implementation should handle translating that to the
            physical version for processing the take if necessary.

        Returns
        -------
        ExtensionArray

        Raises
        ------
        IndexError
            When the indices are out of bounds for the array.
        ValueError
            When `indices` contains negative values other than ``-1``
            and `allow_fill` is True.

        See Also
        --------
        numpy.take
        api.extensions.take

        Notes
        -----
        ExtensionArray.take is called by ``Series.__getitem__``, ``.loc``,
        ``iloc``, when `indices` is a sequence of values. Additionally,
        it's called by :meth:`Series.reindex`, or any other method
        that causes realignment, with a `fill_value`.
        """
        # TODO: Remove once we got rid of the (indices < 0) check
        if not is_array_like(indices):
            indices_array = np.asanyarray(indices)
        else:
            # error: Incompatible types in assignment (expression has type
            # "Sequence[int]", variable has type "ndarray")
            indices_array = indices  # type: ignore[assignment]

        if len(self._data) == 0 and (indices_array >= 0).any():
            raise IndexError("cannot do a non-empty take")
        if indices_array.size > 0 and indices_array.max() >= len(self._data):
            raise IndexError("out of bounds value in 'indices'.")

        if allow_fill:
            fill_mask = indices_array < 0
            if fill_mask.any():
                validate_indices(indices_array, len(self._data))
                # TODO(ARROW-9433): Treat negative indices as NULL
                indices_array = pa.array(indices_array, mask=fill_mask)
                result = self._data.take(indices_array)
                if isna(fill_value):
                    return type(self)(result)
                # TODO: ArrowNotImplementedError: Function fill_null has no
                # kernel matching input types (array[string], scalar[string])
                result = type(self)(result)
                result[fill_mask] = fill_value
                return result
                # return type(self)(pc.fill_null(result, pa.scalar(fill_value)))
            else:
                # Nothing to fill
                return type(self)(self._data.take(indices))
        else:  # allow_fill=False
            # TODO(ARROW-9432): Treat negative indices as indices from the right.
            if (indices_array < 0).any():
                # Don't modify in-place
                indices_array = np.copy(indices_array)
                indices_array[indices_array < 0] += len(self._data)
            return type(self)(self._data.take(indices_array))
Esempio n. 13
0
 def __getitem__(self, item):
     # type (Any) -> Any
     """Select a subset of self.
     Parameters
     ----------
     item : int, slice, or ndarray
         * int: The position in 'self' to get.
         * slice: A slice object, where 'start', 'stop', and 'step' are
           integers or None
         * ndarray: A 1-d boolean NumPy ndarray the same length as 'self'
     Returns
     -------
     item : scalar or ExtensionArray
     Notes
     -----
     For scalar ``item``, return a scalar value suitable for the array's
     type. This should be an instance of ``self.dtype.type``.
     For slice ``key``, return an instance of ``ExtensionArray``, even
     if the slice is length 0 or 1.
     For a boolean mask, return an instance of ``ExtensionArray``, filtered
     to the values where ``item`` is True.
     """
     # Workaround for Arrow bug that segfaults on empty slice.
     # This is fixed in Arrow master, will be released in 0.10
     if isinstance(item, slice):
         start = item.start or 0
         stop = item.stop if item.stop is not None else len(self.data)
         stop = min(stop, len(self.data))
         step = item.step if item.step is not None else 1
         # Arrow can't handle slices with steps other than 1
         # https://issues.apache.org/jira/browse/ARROW-2714
         if step != 1:
             arr = np.asarray(self)[item]
             # ARROW-2806: Inconsistent handling of np.nan requires adding a mask
             if pa.types.is_integer(self.dtype.arrow_dtype) or pa.types.is_floating(
                 self.dtype.arrow_dtype
             ):
                 mask = pd.isna(arr)
             else:
                 mask = None
             return type(self)(pa.array(arr, type=self.dtype.arrow_dtype, mask=mask))
         if stop - start == 0:
             return type(self)(pa.array([], type=self.data.type))
     elif isinstance(item, Iterable):
         if not is_array_like(item):
             item = np.array(item)
         if is_integer_dtype(item):
             return self.take(item)
         elif is_bool_dtype(item):
             indices = np.array(item)
             indices = np.argwhere(indices).flatten()
             return self.take(indices)
         else:
             raise IndexError(
                 "Only integers, slices and integer or boolean arrays are valid indices."
             )
     elif is_integer(item):
         if item < 0:
             item += len(self)
         if item >= len(self):
             return None
     value = self.data[item]
     if isinstance(value, pa.ChunkedArray):
         return type(self)(value)
     else:
         return value.as_py()