def convert_data_to_format(data: np.ndarray, filename: str): if filename.endswith(".wav"): return (data.view(np.float32) * 32767).astype(np.int16) elif filename.endswith(".complex16u") or filename.endswith(".cu8"): return (127.5 * (data.view(np.float32) + 1.0)).astype(np.uint8) elif filename.endswith(".complex16s") or filename.endswith(".cs8"): return (127.5 * ((data.view(np.float32)) - 0.5 / 127.5)).astype(np.int8) else: return data
def calc_distance_matrix(x: np.ndarray): assert x.ndim == 2 # http://stackoverflow.com/a/22721540 z = x.view(dtype=np.complex128) m, n = np.meshgrid(z, z) return abs(m - n)
def __setitem__(self, key: str, val: numpy.ndarray): """Add field to data or set existing field to data. :param str key: Field name :param ndarray val: Field value. Must match dimensions of self.data. Note that this will be added with dtype([key, val.dtype]). """ if key in self.data.dtype.names: self.data[key] = val else: prim = self.data sec = val.view(dtype=[(key, val.dtype)]) self.data = numpy.lib.recfunctions.merge_arrays( (prim, sec)).view(dtype=(prim.dtype.descr + sec.dtype.descr))
def _get_values( values: np.ndarray, skipna: bool, fill_value: Any = None, fill_value_typ: Optional[str] = None, mask: Optional[np.ndarray] = None, ) -> Tuple[np.ndarray, Optional[np.ndarray], np.dtype, np.dtype, Any]: """ Utility to get the values view, mask, dtype, dtype_max, and fill_value. If both mask and fill_value/fill_value_typ are not None and skipna is True, the values array will be copied. For input arrays of boolean or integer dtypes, copies will only occur if a precomputed mask, a fill_value/fill_value_typ, and skipna=True are provided. Parameters ---------- values : ndarray input array to potentially compute mask for skipna : bool boolean for whether NaNs should be skipped fill_value : Any value to fill NaNs with fill_value_typ : str Set to '+inf' or '-inf' to handle dtype-specific infinities mask : Optional[np.ndarray] nan-mask if known Returns ------- values : ndarray Potential copy of input value array mask : Optional[ndarray[bool]] Mask for values, if deemed necessary to compute dtype : dtype dtype for values dtype_max : dtype platform independent dtype fill_value : Any fill value used """ # In _get_values is only called from within nanops, and in all cases # with scalar fill_value. This guarantee is important for the # maybe_upcast_putmask call below assert is_scalar(fill_value) mask = _maybe_get_mask(values, skipna, mask) if is_datetime64tz_dtype(values): # lib.values_from_object returns M8[ns] dtype instead of tz-aware, # so this case must be handled separately from the rest dtype = values.dtype values = getattr(values, "_values", values) else: values = lib.values_from_object(values) dtype = values.dtype if is_datetime_or_timedelta_dtype(values) or is_datetime64tz_dtype(values): # changing timedelta64/datetime64 to int64 needs to happen after # finding `mask` above values = getattr(values, "asi8", values) values = values.view(np.int64) dtype_ok = _na_ok_dtype(dtype) # get our fill value (in case we need to provide an alternative # dtype for it) fill_value = _get_fill_value(dtype, fill_value=fill_value, fill_value_typ=fill_value_typ) copy = (mask is not None) and (fill_value is not None) if skipna and copy: values = values.copy() if dtype_ok: np.putmask(values, mask, fill_value) # promote if needed else: values, changed = maybe_upcast_putmask(values, mask, fill_value) # return a platform independent precision dtype dtype_max = dtype if is_integer_dtype(dtype) or is_bool_dtype(dtype): dtype_max = np.int64 elif is_float_dtype(dtype): dtype_max = np.float64 return values, mask, dtype, dtype_max, fill_value
def pack_complex(complex_samples: np.ndarray): assert complex_samples.dtype == np.complex64 # tostring() is a compatibility (numpy<1.9) alias for tobytes(). Despite its name it returns bytes not strings. return (127.5 * ((complex_samples.view(np.float32)) - 0.5 / 127.5)).astype( np.int8).tostring()
def __init__(self, qsim_data: np.ndarray, qubit_map: Dict[ops.Qid, int]): state_vector = qsim_data.view(np.complex64) super().__init__(state_vector=state_vector, qubit_map=qubit_map)
def add_unaligned_array_of_standard_bit_length_primitives( self, x: numpy.ndarray) -> None: # This is much slower than the aligned version because we have to manually copy and shift each byte, # but still better than manual elementwise serialization. assert x.dtype not in (numpy.bool, numpy.bool_, numpy.object) self.add_unaligned_bytes(x.view(_Byte))
def intersect(A: np.ndarray, B: np.ndarray, axis=0, assume_unique=False, return_indices=False) -> Any: """ Extends numpy's intersect1d to find the row or column-wise intersection of two 2d arrays. Takes identical input to numpy intersect1d. Args: A, B (np.ndarray): arrays of matching widths and datatypes Returns: ndarray: sorted 1D array of common rows/cols between the input arrays ndarray: the indices of the first occurrences of the common values in A. Only provided if return_indices is True. ndarray: the indices of the first occurrences of the common values in B. Only provided if return_indices is True. """ #see https://stackoverflow.com/questions/8317022/get-intersecting-rows-across-two-2d-numpy-arrays #pylint: disable=no-else-return if A.ndim != B.ndim: raise ValueError("array ndims must match to intersect") if A.ndim == 1: return np.intersect1d( A, B, assume_unique=assume_unique, return_indices=return_indices) elif A.ndim == 2: if axis == 0: ncols = A.shape[1] if A.shape[1] != B.shape[1]: raise ValueError("array widths must match to intersect") dtype = { 'names': ['f{}'.format(i) for i in range(ncols)], 'formats': ncols * [A.dtype] } if return_indices: C, A_locs, B_locs = np.intersect1d( A.view(dtype), B.view(dtype), assume_unique=assume_unique, return_indices=return_indices) return C.view(A.dtype).reshape(-1, ncols), A_locs, B_locs C = np.intersect1d( A.view(dtype), B.view(dtype), assume_unique=assume_unique) return C.view(A.dtype).reshape(-1, ncols) elif axis == 1: out = intersect( A.T.copy(), B.T.copy(), axis=0, assume_unique=assume_unique, return_indices=return_indices) if return_indices: return out[0].T, out[1], out[2] return out.T raise NotImplementedError( "intersection can only be performed on first or second axis") raise NotImplementedError("intersect is only implemented for 1d or 2d arrays")
def pack_complex(complex_samples: np.ndarray): # We can pass the complex samples directly to the USRP Send API arr = Array("f", 2 * len(complex_samples), lock=False) numpy_view = np.frombuffer(arr, dtype=np.float32) numpy_view[:] = complex_samples.view(np.float32) return arr
def _from_join_target(self, result: np.ndarray): # view e.g. i8 back to M8[ns] result = result.view(self._data._ndarray.dtype) return self._data._from_backing_data(result)
def to_forecasting(timeseries: np.ndarray, forecast: int = 1, axis: Union[int, float] = 0, test_size: int = None): """Split a timeseries for forecasting tasks. Transform a timeseries :math:`X` into a series of input values :math:`X_t` and a series of output values :math:`X_{t+\\mathrm{forecast}}`. It is also possible to split the timeseries between training timesteps and testing timesteps. Parameters ---------- timeseries : np.ndarray Timeseries to split. forecast : int, optional Number of time lag steps between the timeseries :math:`X_t` and the timeseries :math:`X_{t+\\mathrm{forecast}}`, by default 1, i.e. returns two timeseries with a time difference of 1 timesteps. axis : int, optional Time axis of the timeseries, by default 0 test_size : int or float, optional If set, will also split the timeseries into a training phase and a testing phase of ``test_size`` timesteps. Can also be specified as a float ratio, by default None Returns ------- tuple of numpy.ndarray :math:`X_t` and :math:`X_{t+\\mathrm{forecast}}`. If ``test_size`` is specified, will return: :math:`X_t`, :math:`X_t^{test}`, :math:`X_{t+\\mathrm{forecast}}`, :math:`X_{t+\\mathrm{forecast}}^{test}`. The size of the returned timeseries is therefore the size of :math:`X` minus the forecasting length ``forecast``. Raises ------ ValueError If ``test_size`` is a float, it must be in [0, 1[. """ series_ = np.moveaxis(timeseries.view(), axis, 0) time_len = series_.shape[0] if test_size is not None: if isinstance(test_size, float) and test_size < 1 and test_size >= 0: test_len = round(time_len * test_size) elif isinstance(test_size, int): test_len = test_size else: raise ValueError("invalid test_size argument: " "test_size can be an integer or a float " f"in [0, 1[, but is {test_size}.") else: test_len = 0 X = series_[:-forecast] y = series_[forecast:] if test_len > 0: X_t = X[-test_len:] y_t = y[-test_len:] X = X[:-test_len] y = y[:-test_len] X = np.moveaxis(X, 0, axis) X_t = np.moveaxis(X_t, 0, axis) y = np.moveaxis(y, 0, axis) y_t = np.moveaxis(y_t, 0, axis) return X, X_t, y, y_t return np.moveaxis(X, 0, axis), np.moveaxis(y, 0, axis)
def __init__(self, n: int, array: np.ndarray = None): super().__init__(1, n, array.view())
def astype_nansafe(arr: np.ndarray, dtype: DtypeObj, copy: bool = True, skipna: bool = False) -> ArrayLike: """ Cast the elements of an array to a given dtype a nan-safe manner. Parameters ---------- arr : ndarray dtype : np.dtype or ExtensionDtype copy : bool, default True If False, a view will be attempted but may fail, if e.g. the item sizes don't align. skipna: bool, default False Whether or not we should skip NaN when casting as a string-type. Raises ------ ValueError The dtype was a datetime64/timedelta64 dtype, but it had no unit. """ # We get here with 0-dim from sparse arr = np.atleast_1d(arr) # dispatch on extension dtype if needed if isinstance(dtype, ExtensionDtype): return dtype.construct_array_type()._from_sequence(arr, dtype=dtype, copy=copy) elif not isinstance(dtype, np.dtype): # pragma: no cover raise ValueError("dtype must be np.dtype or ExtensionDtype") if arr.dtype.kind in ["m", "M"] and (issubclass(dtype.type, str) or dtype == _dtype_obj): from pandas.core.construction import ensure_wrapped_if_datetimelike arr = ensure_wrapped_if_datetimelike(arr) return arr.astype(dtype, copy=copy) if issubclass(dtype.type, str): shape = arr.shape if arr.ndim > 1: arr = arr.ravel() return lib.ensure_string_array(arr, skipna=skipna, convert_na_value=False).reshape(shape) elif is_datetime64_dtype(arr.dtype): if dtype == np.int64: if isna(arr).any(): raise ValueError("Cannot convert NaT values to integer") return arr.view(dtype) # allow frequency conversions if dtype.kind == "M": return arr.astype(dtype) raise TypeError( f"cannot astype a datetimelike from [{arr.dtype}] to [{dtype}]") elif is_timedelta64_dtype(arr.dtype): if dtype == np.int64: if isna(arr).any(): raise ValueError("Cannot convert NaT values to integer") return arr.view(dtype) elif dtype.kind == "m": return astype_td64_unit_conversion(arr, dtype, copy=copy) raise TypeError( f"cannot astype a timedelta from [{arr.dtype}] to [{dtype}]") elif np.issubdtype(arr.dtype, np.floating) and is_integer_dtype(dtype): return _astype_float_to_int_nansafe(arr, dtype, copy) elif is_object_dtype(arr.dtype): # if we have a datetime/timedelta array of objects # then coerce to a proper dtype and recall astype_nansafe if is_datetime64_dtype(dtype): from pandas import to_datetime return astype_nansafe( to_datetime(arr.ravel()).values.reshape(arr.shape), dtype, copy=copy, ) elif is_timedelta64_dtype(dtype): # bc we know arr.dtype == object, this is equivalent to # `np.asarray(to_timedelta(arr))`, but using a lower-level API that # does not require a circular import. return array_to_timedelta64(arr).view("m8[ns]").astype(dtype, copy=False) if dtype.name in ("datetime64", "timedelta64"): msg = (f"The '{dtype.name}' dtype has no unit. Please pass in " f"'{dtype.name}[ns]' instead.") raise ValueError(msg) if copy or is_object_dtype(arr.dtype) or is_object_dtype(dtype): # Explicit copy, or required since NumPy can't view from / to object. return arr.astype(dtype, copy=True) return arr.astype(dtype, copy=copy)
def forward(self, x: numpy.ndarray): return x.view(x.size(0), -1)
def forward(self, state: np.ndarray) -> np.ndarray: state = self.conv(state) state = state.view(state.size(0), -1) state = self.fc(state) return state
def pack_complex(complex_samples: np.ndarray): assert complex_samples.dtype == np.complex64 return (127.5 * ((complex_samples.view(np.float32)) - 0.5 / 127.5)).astype( np.int8)
def pack_complex(complex_samples: np.ndarray): # We can pass the complex samples directly to the LimeSDR Send API return complex_samples.view(np.float32)
def nanpercentile( values: np.ndarray, q, *, axis: int, na_value, mask: np.ndarray, ndim: int, interpolation, ): """ Wrapper for np.percentile that skips missing values. Parameters ---------- values : array over which to find quantiles q : scalar or array of quantile indices to find axis : {0, 1} na_value : scalar value to return for empty or all-null values mask : ndarray[bool] locations in values that should be considered missing ndim : {1, 2} interpolation : str Returns ------- quantiles : scalar or array """ if values.dtype.kind in ["m", "M"]: # need to cast to integer to avoid rounding errors in numpy result = nanpercentile( values.view("i8"), q=q, axis=axis, na_value=na_value.view("i8"), mask=mask, ndim=ndim, interpolation=interpolation, ) # Note: we have to do `astype` and not view because in general we # have float result at this point, not i8 return result.astype(values.dtype) if not lib.is_scalar(mask) and mask.any(): if ndim == 1: return _nanpercentile_1d(values, mask, q, na_value, interpolation=interpolation) else: # for nonconsolidatable blocks mask is 1D, but values 2D if mask.ndim < values.ndim: mask = mask.reshape(values.shape) if axis == 0: values = values.T mask = mask.T result = [ _nanpercentile_1d(val, m, q, na_value, interpolation=interpolation) for (val, m) in zip(list(values), list(mask)) ] result = np.array(result, dtype=values.dtype, copy=False).T return result else: return np.percentile(values, q, axis=axis, interpolation=interpolation)
def pack_complex(complex_samples: np.ndarray): arr = Array("f", 2 * len(complex_samples), lock=False) numpy_view = np.frombuffer(arr, dtype=np.float32) numpy_view[:] = complex_samples.view(np.float32) return arr
def pack_complex(complex_samples: np.ndarray): assert complex_samples.dtype == np.complex64 # tostring() is a compatibility (numpy<1.9) alias for tobytes(). Despite its name it returns bytes not strings. return complex_samples.view(np.float32).tostring()
def update_remaining_params(self, user_data: np.ndarray, params: np.ndarray) -> None: if not self.user_data_dtype == self.params_dtype: raise ValueError('Problem needs to overwrite `update_subset_params`.') user_data.view(self.params_subset.remainder.subset_view_dtype).fill(params)
def pack_complex(complex_samples: np.ndarray): return (127.5 * (complex_samples.view(np.float32) + 1.0)).astype(np.uint8).tostring()
def extract_subset_params(self, user_data: np.ndarray, out: Optional[np.ndarray] = None) -> None: if not self.user_data_dtype == self.params_dtype: raise ValueError('Problem needs to overwrite `extract_subset_params`.') if out is None: out = np.empty((1,), dtype=self.params_subset.subset_dtype)[0] out.fill(user_data.view(self.params_subset.subset_dtype))
def _view_row_as_element(array: np.ndarray) -> np.ndarray: nrows, ncols = array.shape dtype = {"names": ["f{}".format(i) for i in range(ncols)], "formats": ncols * [array.dtype]} return array.view(dtype)
def _encode_image(self, np_image: np.ndarray) -> bytes: _validate_np_array(np_image, shape=self._float_shape, dtype=tf.float32) # Bitcast 1 channel float32 -> 4 channels uint8 np_image = np_image.view(np.uint8) np_image = super()._encode_image(np_image) return np_image
def estimate_channel_freq_domain( self, received_signal: np.ndarray, num_taps_to_keep: int, extra_dimension: bool = True) -> np.ndarray: """ Estimate the channel based on the received signal with cover codes. Parameters ---------- received_signal : np.ndarray The received reference signal after being transmitted through the channel (in the frequency domain). Dimension: Depend if there are multiple receive antennas and if `extra_dimension` is True or False. Let :math:`Nr` be the number of receive antennas, :math:`Ne` be the number of reference signal elements (reference signal size without cover code) and :math:`Nc` be the cover code size. The dimension of `received_signal` must match the table below. ================= ======================= ====================== / extra_dimension: True extra_dimension: False ================= ======================= ====================== Single Antenna Nc x Ne (2D) Ne * Nc (1D) Multiple Antennas Nr x Nc x Ne (3D) Nr x (Ne * Nc) (2D) ================= ======================= ====================== num_taps_to_keep : int Number of taps (in delay domain) to keep. All taps from 0 to `num_taps_to_keep`-1 will be kept and all other taps will be zeroed before applying the FFT to get the channel response in the frequency domain. extra_dimension : bool If True then the should be an extra dimension in `received_signal` corresponding to the cover code dimension. If False then the cover code is included in the dimension of the reference signal elements. Returns ------- freq_response : np.ndarray The channel frequency response. """ # xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx # xxxxx Add the extra dimension if it does not exist xxxxxxxxxxxxxx # xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx # Create a view for the received signals. If extra_dimension is # false we will reshape this view to add a dimension for the cover # code r = received_signal.view() if extra_dimension is False: # Let's reorganize the received signal so that we have the # extra dimension if received_signal.ndim == 1: # Case with a single antenna. Cover code dimension will be # the first dimension. r.shape = (self.cover_code.size, -1) elif received_signal.ndim == 2: # Case with multiple antennas. Cover code dimension will be # the second dimension. num_antennas = r.shape[0] r.shape = (num_antennas, self.cover_code.size, -1) else: raise RuntimeError( 'Invalid dimension for received_signal: {0}'.format( r.ndim)) # xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx # xxxxxxxxxxxxxxx Average over the cover code dimension xxxxxxxxxxx # xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx # Now we can consider the case with the extra cover_code dimension if r.ndim == 2: # Apply the cover code r_mean = np.mean(r * self.cover_code[:, np.newaxis], axis=0) elif r.ndim == 3: r_mean = np.mean(r * self.cover_code[np.newaxis, :, np.newaxis], axis=1) else: raise RuntimeError( 'Invalid dimension for received_signal: {0}'.format(r.ndim)) # xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx # xxxxxxxxxxxxxxx Perform the estimation xxxxxxxxxxxxxxxxxxxxxxxxxx # xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx # Call the estimate_channel_freq_domain from the base class return super().estimate_channel_freq_domain(r_mean, num_taps_to_keep)
def _call_cython_op( self, values: np.ndarray, # np.ndarray[ndim=2] *, min_count: int, ngroups: int, comp_ids: np.ndarray, mask: np.ndarray | None, **kwargs, ) -> np.ndarray: # np.ndarray[ndim=2] orig_values = values dtype = values.dtype is_numeric = is_numeric_dtype(dtype) is_datetimelike = needs_i8_conversion(dtype) if is_datetimelike: values = values.view("int64") is_numeric = True elif is_bool_dtype(dtype): values = values.astype("int64") elif is_integer_dtype(dtype): # e.g. uint8 -> uint64, int16 -> int64 dtype_str = dtype.kind + "8" values = values.astype(dtype_str, copy=False) elif is_numeric: if not is_complex_dtype(dtype): values = ensure_float64(values) values = values.T if mask is not None: mask = mask.T out_shape = self._get_output_shape(ngroups, values) func, values = self.get_cython_func_and_vals(values, is_numeric) out_dtype = self.get_out_dtype(values.dtype) result = maybe_fill(np.empty(out_shape, dtype=out_dtype)) if self.kind == "aggregate": counts = np.zeros(ngroups, dtype=np.int64) if self.how in ["min", "max"]: func( result, counts, values, comp_ids, min_count, is_datetimelike=is_datetimelike, ) else: func(result, counts, values, comp_ids, min_count) else: # TODO: min_count if self.uses_mask(): func( result, values, comp_ids, ngroups, is_datetimelike, mask=mask, **kwargs, ) else: func(result, values, comp_ids, ngroups, is_datetimelike, **kwargs) if self.kind == "aggregate": # i.e. counts is defined. Locations where count<min_count # need to have the result set to np.nan, which may require casting, # see GH#40767 if is_integer_dtype(result.dtype) and not is_datetimelike: cutoff = max(1, min_count) empty_groups = counts < cutoff if empty_groups.any(): # Note: this conversion could be lossy, see GH#40767 result = result.astype("float64") result[empty_groups] = np.nan result = result.T if self.how not in self.cast_blocklist: # e.g. if we are int64 and need to restore to datetime64/timedelta64 # "rank" is the only member of cast_blocklist we get here res_dtype = self._get_result_dtype(orig_values.dtype) op_result = maybe_downcast_to_dtype(result, res_dtype) else: op_result = result # error: Incompatible return value type (got "Union[ExtensionArray, ndarray]", # expected "ndarray") return op_result # type: ignore[return-value]
def pack_complex(complex_samples: np.ndarray): assert complex_samples.dtype == np.complex64 arr = Array("B", 2*len(complex_samples), lock=False) numpy_view = np.frombuffer(arr, dtype=np.int8) numpy_view[:] = (127.5 * ((complex_samples.view(np.float32)) - 0.5 / 127.5)).astype(np.int8) return arr
def pack_complex(complex_samples: np.ndarray): return (127.5 * (complex_samples.view(np.float32) + 1.0)).astype( np.uint8).tostring()
def pack_complex(complex_samples: np.ndarray): arr = Array("h", 2 * len(complex_samples), lock=False) numpy_view = np.frombuffer(arr, dtype=np.int16) # https://wiki.analog.com/resources/eval/user-guides/ad-fmcomms2-ebz/software/basic_iq_datafiles#binary_format numpy_view[:] = np.left_shift((2048 * complex_samples.view(np.float32)).astype(np.int16), 4) return arr
def from_array(cls, array: np.ndarray) -> 'Coordinate': """Generate a `Coordinate` vector from a 3-element array.""" if array.shape != (3,): raise ValueError('Invalid shape for a coordinate object') return array.view(cls)
def pack_complex(complex_samples: np.ndarray): assert complex_samples.dtype == np.complex64 return (127.5 * ((complex_samples.view(np.float32)) - 0.5 / 127.5)).astype(np.int8)
def as_vector(a: np.ndarray): """Return a view of a complex line array that behaves as an Nx2 real array""" return a.view(dtype=float).reshape(len(a), 2)
def _byte_buffer(cls, data: np.ndarray) -> np.ndarray: """Reinterpret a contiguous array as an array of bytes.""" view = data.view() view.shape = (data.size, ) # Reshape while disallowing copy return view.view(np.uint8)
def _get_values( values: np.ndarray, skipna: bool, fill_value: Any = None, fill_value_typ: Optional[str] = None, mask: Optional[np.ndarray] = None, ) -> Tuple[np.ndarray, Optional[np.ndarray], np.dtype, np.dtype, Any]: """ Utility to get the values view, mask, dtype, dtype_max, and fill_value. If both mask and fill_value/fill_value_typ are not None and skipna is True, the values array will be copied. For input arrays of boolean or integer dtypes, copies will only occur if a precomputed mask, a fill_value/fill_value_typ, and skipna=True are provided. Parameters ---------- values : ndarray input array to potentially compute mask for skipna : bool boolean for whether NaNs should be skipped fill_value : Any value to fill NaNs with fill_value_typ : str Set to '+inf' or '-inf' to handle dtype-specific infinities mask : Optional[np.ndarray] nan-mask if known Returns ------- values : ndarray Potential copy of input value array mask : Optional[ndarray[bool]] Mask for values, if deemed necessary to compute dtype : np.dtype dtype for values dtype_max : np.dtype platform independent dtype fill_value : Any fill value used """ # In _get_values is only called from within nanops, and in all cases # with scalar fill_value. This guarantee is important for the # np.where call below assert is_scalar(fill_value) values = extract_array(values, extract_numpy=True) mask = _maybe_get_mask(values, skipna, mask) dtype = values.dtype datetimelike = False if needs_i8_conversion(values.dtype): # changing timedelta64/datetime64 to int64 needs to happen after # finding `mask` above values = np.asarray(values.view("i8")) datetimelike = True dtype_ok = _na_ok_dtype(dtype) # get our fill value (in case we need to provide an alternative # dtype for it) fill_value = _get_fill_value(dtype, fill_value=fill_value, fill_value_typ=fill_value_typ) if skipna and (mask is not None) and (fill_value is not None): if mask.any(): if dtype_ok or datetimelike: values = values.copy() np.putmask(values, mask, fill_value) else: # np.where will promote if needed values = np.where(~mask, values, fill_value) # return a platform independent precision dtype dtype_max = dtype if is_integer_dtype(dtype) or is_bool_dtype(dtype): dtype_max = np.dtype(np.int64) elif is_float_dtype(dtype): dtype_max = np.dtype(np.float64) return values, mask, dtype, dtype_max, fill_value
def _transform_input(self, input_data: np.ndarray): return input_data.view(input_data.shape[0], -1)
def pack_complex(complex_samples: np.ndarray): arr = Array("f", 2*len(complex_samples), lock=False) numpy_view = np.frombuffer(arr, dtype=np.float32) numpy_view[:] = complex_samples.view(np.float32) return arr
def _call_cython_op( self, values: np.ndarray, # np.ndarray[ndim=2] *, min_count: int, ngroups: int, comp_ids: np.ndarray, mask: npt.NDArray[np.bool_] | None, result_mask: npt.NDArray[np.bool_] | None, **kwargs, ) -> np.ndarray: # np.ndarray[ndim=2] orig_values = values dtype = values.dtype is_numeric = is_numeric_dtype(dtype) is_datetimelike = needs_i8_conversion(dtype) if is_datetimelike: values = values.view("int64") is_numeric = True elif is_bool_dtype(dtype): values = values.astype("int64") elif is_integer_dtype(dtype): # GH#43329 If the dtype is explicitly of type uint64 the type is not # changed to prevent overflow. if dtype != np.uint64: values = values.astype(np.int64, copy=False) elif is_numeric: if not is_complex_dtype(dtype): values = ensure_float64(values) values = values.T if mask is not None: mask = mask.T if result_mask is not None: result_mask = result_mask.T out_shape = self._get_output_shape(ngroups, values) func = self._get_cython_function(self.kind, self.how, values.dtype, is_numeric) values = self._get_cython_vals(values) out_dtype = self._get_out_dtype(values.dtype) result = maybe_fill(np.empty(out_shape, dtype=out_dtype)) if self.kind == "aggregate": counts = np.zeros(ngroups, dtype=np.int64) if self.how in ["min", "max", "mean"]: func( out=result, counts=counts, values=values, labels=comp_ids, min_count=min_count, mask=mask, result_mask=result_mask, is_datetimelike=is_datetimelike, ) elif self.how in ["first", "last"]: func( out=result, counts=counts, values=values, labels=comp_ids, min_count=min_count, mask=mask, result_mask=result_mask, ) elif self.how in ["add"]: # We support datetimelike func( out=result, counts=counts, values=values, labels=comp_ids, min_count=min_count, is_datetimelike=is_datetimelike, ) else: func(result, counts, values, comp_ids, min_count) else: # TODO: min_count if self.uses_mask(): func( out=result, values=values, labels=comp_ids, ngroups=ngroups, is_datetimelike=is_datetimelike, mask=mask, result_mask=result_mask, **kwargs, ) else: func( out=result, values=values, labels=comp_ids, ngroups=ngroups, is_datetimelike=is_datetimelike, **kwargs, ) if self.kind == "aggregate": # i.e. counts is defined. Locations where count<min_count # need to have the result set to np.nan, which may require casting, # see GH#40767 if is_integer_dtype(result.dtype) and not is_datetimelike: cutoff = max(1, min_count) empty_groups = counts < cutoff if empty_groups.any(): if result_mask is not None and self.uses_mask(): assert result_mask[empty_groups].all() else: # Note: this conversion could be lossy, see GH#40767 result = result.astype("float64") result[empty_groups] = np.nan result = result.T if self.how not in self.cast_blocklist: # e.g. if we are int64 and need to restore to datetime64/timedelta64 # "rank" is the only member of cast_blocklist we get here res_dtype = self._get_result_dtype(orig_values.dtype) op_result = maybe_downcast_to_dtype(result, res_dtype) else: op_result = result # error: Incompatible return value type (got "Union[ExtensionArray, ndarray]", # expected "ndarray") return op_result # type: ignore[return-value]
def _nanpercentile( values: np.ndarray, qs: npt.NDArray[np.float64], *, na_value, mask: npt.NDArray[np.bool_], interpolation: str, ): """ Wrapper for np.percentile that skips missing values. Parameters ---------- values : np.ndarray[ndim=2] over which to find quantiles qs : np.ndarray[float64] of quantile indices to find na_value : scalar value to return for empty or all-null values mask : np.ndarray[bool] locations in values that should be considered missing interpolation : str Returns ------- quantiles : scalar or array """ if values.dtype.kind in ["m", "M"]: # need to cast to integer to avoid rounding errors in numpy result = _nanpercentile( values.view("i8"), qs=qs, na_value=na_value.view("i8"), mask=mask, interpolation=interpolation, ) # Note: we have to do `astype` and not view because in general we # have float result at this point, not i8 return result.astype(values.dtype) if mask.any(): # Caller is responsible for ensuring mask shape match assert mask.shape == values.shape result = [ _nanpercentile_1d(val, m, qs, na_value, interpolation=interpolation) for (val, m) in zip(list(values), list(mask)) ] if values.dtype.kind == "f": # preserve itemsize result = np.array(result, dtype=values.dtype, copy=False).T else: result = np.array(result, copy=False).T if (result.dtype != values.dtype and (result == result.astype(values.dtype, copy=False)).all()): # e.g. values id integer dtype and result is floating dtype, # only cast back to integer dtype if result values are all-integer. result = result.astype(values.dtype, copy=False) return result else: return np.percentile( values, qs, axis=1, # error: No overload variant of "percentile" matches argument types # "ndarray[Any, Any]", "ndarray[Any, dtype[floating[_64Bit]]]", # "int", "Dict[str, str]" [call-overload] **{np_percentile_argname: interpolation}, # type: ignore[call-overload] )