def view_element_first_byte(x): """Construct an array viewing the first byte of each element of `x`""" from numpy1.lib.stride_tricks import DummyArray interface = dict(x.__array_interface__) interface['typestr'] = '|b1' interface['descr'] = [('', '|b1')] return np.asarray(DummyArray(interface, x))
def test_array_richcompare_legacy_weirdness(self): # It doesn't really work to use assert_deprecated here, b/c part of # the point of assert_deprecated is to check that when warnings are # set to "error" mode then the error is propagated -- which is good! # But here we are testing a bunch of code that is deprecated *because* # it has the habit of swallowing up errors and converting them into # different warnings. So assert_warns will have to be sufficient. assert_warns(FutureWarning, lambda: np.arange(2) == "a") assert_warns(FutureWarning, lambda: np.arange(2) != "a") # No warning for scalar comparisons with warnings.catch_warnings(): warnings.filterwarnings("error") assert_(not (np.array(0) == "a")) assert_(np.array(0) != "a") assert_(not (np.int16(0) == "a")) assert_(np.int16(0) != "a") for arg1 in [np.asarray(0), np.int16(0)]: struct = np.zeros(2, dtype="i4,i4") for arg2 in [struct, "a"]: for f in [operator.lt, operator.le, operator.gt, operator.ge]: if sys.version_info[0] >= 3: # py3 with warnings.catch_warnings() as l: warnings.filterwarnings("always") assert_raises(TypeError, f, arg1, arg2) assert_(not l) else: # py2 assert_warns(DeprecationWarning, f, arg1, arg2)
def _guessvartypes(arr): """ Tries to guess the dtypes of the str_ ndarray `arr`. Guesses by testing element-wise conversion. Returns a list of dtypes. The array is first converted to ndarray. If the array is 2D, the test is performed on the first line. An exception is raised if the file is 3D or more. """ vartypes = [] arr = np.asarray(arr) if arr.ndim == 2: arr = arr[0] elif arr.ndim > 2: raise ValueError("The array should be 2D at most!") # Start the conversion loop. for f in arr: try: int(f) except (ValueError, TypeError): try: float(f) except (ValueError, TypeError): try: complex(f) except (ValueError, TypeError): vartypes.append(arr.dtype) else: vartypes.append(np.dtype(complex)) else: vartypes.append(np.dtype(float)) else: vartypes.append(np.dtype(int)) return vartypes
def test_view(self): fp = memmap(self.tmpfp, dtype=self.dtype, shape=self.shape) new1 = fp.view() new2 = new1.view() assert_(new1.base is fp) assert_(new2.base is fp) new_array = asarray(fp) assert_(new_array.base is fp)
def _normalize_shape(ndarray, shape, cast_to_int=True): """ Private function which does some checks and normalizes the possibly much simpler representations of 'pad_width', 'stat_length', 'constant_values', 'end_values'. Parameters ---------- narray : ndarray Input ndarray shape : {sequence, array_like, float, int}, optional The width of padding (pad_width), the number of elements on the edge of the narray used for statistics (stat_length), the constant value(s) to use when filling padded regions (constant_values), or the endpoint target(s) for linear ramps (end_values). ((before_1, after_1), ... (before_N, after_N)) unique number of elements for each axis where `N` is rank of `narray`. ((before, after),) yields same before and after constants for each axis. (constant,) or val is a shortcut for before = after = constant for all axes. cast_to_int : bool, optional Controls if values in ``shape`` will be rounded and cast to int before being returned. Returns ------- normalized_shape : tuple of tuples val => ((val, val), (val, val), ...) [[val1, val2], [val3, val4], ...] => ((val1, val2), (val3, val4), ...) ((val1, val2), (val3, val4), ...) => no change [[val1, val2], ] => ((val1, val2), (val1, val2), ...) ((val1, val2), ) => ((val1, val2), (val1, val2), ...) [[val , ], ] => ((val, val), (val, val), ...) ((val , ), ) => ((val, val), (val, val), ...) """ ndims = ndarray.ndim # Shortcut shape=None if shape is None: return ((None, None), ) * ndims # Convert any input `info` to a NumPy array shape_arr = np.asarray(shape) try: shape_arr = np.broadcast_to(shape_arr, (ndims, 2)) except ValueError: fmt = "Unable to create correctly shaped tuple from %s" raise ValueError(fmt % (shape, )) # Cast if necessary if cast_to_int is True: shape_arr = np.round(shape_arr).astype(int) # Convert list of lists to tuple of tuples return tuple(tuple(axis) for axis in shape_arr.tolist())
def _ravel_and_check_weights(a, weights): """ Check a and weights have matching shapes, and ravel both """ a = np.asarray(a) # Ensure that the array is a "subtractable" dtype if a.dtype == np.bool_: warnings.warn( "Converting input from {} to {} for compatibility.".format( a.dtype, np.uint8), RuntimeWarning, stacklevel=2) a = a.astype(np.uint8) if weights is not None: weights = np.asarray(weights) if weights.shape != a.shape: raise ValueError('weights should have the same shape as a.') weights = weights.ravel() a = a.ravel() return a, weights
def _getformats(data): """ Returns the formats of arrays in arraylist as a comma-separated string. """ if hasattr(data, 'dtype'): return ",".join([desc[1] for desc in data.dtype.descr]) formats = '' for obj in data: obj = np.asarray(obj) formats += _typestr[obj.dtype.type] if issubclass(obj.dtype.type, ntypes.flexible): formats += repr(obj.itemsize) formats += ',' return formats[:-1]
def npv(rate, values): """ Returns the NPV (Net Present Value) of a cash flow series. Parameters ---------- rate : scalar The discount rate. values : array_like, shape(M, ) The values of the time series of cash flows. The (fixed) time interval between cash flow "events" must be the same as that for which `rate` is given (i.e., if `rate` is per year, then precisely a year is understood to elapse between each cash flow event). By convention, investments or "deposits" are negative, income or "withdrawals" are positive; `values` must begin with the initial investment, thus `values[0]` will typically be negative. Returns ------- out : float The NPV of the input cash flow series `values` at the discount `rate`. Notes ----- Returns the result of: [G]_ .. math :: \\sum_{t=0}^{M-1}{\\frac{values_t}{(1+rate)^{t}}} References ---------- .. [G] L. J. Gitman, "Principles of Managerial Finance, Brief," 3rd ed., Addison-Wesley, 2003, pg. 346. Examples -------- >>> np.npv(0.281,[-100, 39, 59, 55, 20]) -0.0084785916384548798 (Compare with the Example given for numpy.lib.financial.irr) """ values = np.asarray(values) return (values / (1 + rate)**np.arange(0, len(values))).sum(axis=0)
def setdiff1d(ar1, ar2, assume_unique=False): """ Find the set difference of two arrays. Return the sorted, unique values in `ar1` that are not in `ar2`. Parameters ---------- ar1 : array_like Input array. ar2 : array_like Input comparison array. assume_unique : bool If True, the input arrays are both assumed to be unique, which can speed up the calculation. Default is False. Returns ------- setdiff1d : ndarray Sorted 1D array of values in `ar1` that are not in `ar2`. See Also -------- numpy.lib.arraysetops : Module with a number of other functions for performing set operations on arrays. Examples -------- >>> a = np.array([1, 2, 3, 2, 4, 1]) >>> b = np.array([3, 4, 5, 6]) >>> np.setdiff1d(a, b) array([1, 2]) """ if assume_unique: ar1 = np.asarray(ar1).ravel() else: ar1 = unique(ar1) ar2 = unique(ar2) return ar1[in1d(ar1, ar2, assume_unique=True, invert=True)]
def mirr(values, finance_rate, reinvest_rate): """ Modified internal rate of return. Parameters ---------- values : array_like Cash flows (must contain at least one positive and one negative value) or nan is returned. The first value is considered a sunk cost at time zero. finance_rate : scalar Interest rate paid on the cash flows reinvest_rate : scalar Interest rate received on the cash flows upon reinvestment Returns ------- out : float Modified internal rate of return """ values = np.asarray(values) n = values.size # Without this explicit cast the 1/(n - 1) computation below # becomes a float, which causes TypeError when using Decimal # values. if isinstance(finance_rate, Decimal): n = Decimal(n) pos = values > 0 neg = values < 0 if not (pos.any() and neg.any()): return np.nan numer = np.abs(npv(reinvest_rate, values * pos)) denom = np.abs(npv(finance_rate, values * neg)) return (numer / denom)**(1 / (n - 1)) * (1 + reinvest_rate) - 1
def as_strided(x, shape=None, strides=None, subok=False, writeable=True): """ Create a view into the array with the given shape and strides. .. warning:: This function has to be used with extreme care, see notes. Parameters ---------- x : ndarray Array to create a new. shape : sequence of int, optional The shape of the new array. Defaults to ``x.shape``. strides : sequence of int, optional The strides of the new array. Defaults to ``x.strides``. subok : bool, optional .. versionadded:: 1.10 If True, subclasses are preserved. writeable : bool, optional .. versionadded:: 1.12 If set to False, the returned array will always be readonly. Otherwise it will be writable if the original array was. It is advisable to set this to False if possible (see Notes). Returns ------- view : ndarray See also -------- broadcast_to: broadcast an array to a given shape. reshape : reshape an array. Notes ----- ``as_strided`` creates a view into the array given the exact strides and shape. This means it manipulates the internal data structure of ndarray and, if done incorrectly, the array elements can point to invalid memory and can corrupt results or crash your program. It is advisable to always use the original ``x.strides`` when calculating new strides to avoid reliance on a contiguous memory layout. Furthermore, arrays created with this function often contain self overlapping memory, so that two elements are identical. Vectorized write operations on such arrays will typically be unpredictable. They may even give different results for small, large, or transposed arrays. Since writing to these arrays has to be tested and done with great care, you may want to use ``writeable=False`` to avoid accidental write operations. For these reasons it is advisable to avoid ``as_strided`` when possible. """ # first convert input to array, possibly keeping subclass x = np.array(x, copy=False, subok=subok) interface = dict(x.__array_interface__) if shape is not None: interface['shape'] = tuple(shape) if strides is not None: interface['strides'] = tuple(strides) array = np.asarray(DummyArray(interface, base=x)) # The route via `__interface__` does not preserve structured # dtypes. Since dtype should remain unchanged, we set it explicitly. array.dtype = x.dtype view = _maybe_view_as_subclass(x, array) if view.flags.writeable and not writeable: view.flags.writeable = False return view
def __new__(cls, inp): obj = np.asarray(inp).view(cls) return obj
def pad(array, pad_width, mode, **kwargs): """ Pads an array. Parameters ---------- array : array_like of rank N Input array pad_width : {sequence, array_like, int} Number of values padded to the edges of each axis. ((before_1, after_1), ... (before_N, after_N)) unique pad widths for each axis. ((before, after),) yields same before and after pad for each axis. (pad,) or int is a shortcut for before = after = pad width for all axes. mode : str or function One of the following string values or a user supplied function. 'constant' Pads with a constant value. 'edge' Pads with the edge values of array. 'linear_ramp' Pads with the linear ramp between end_value and the array edge value. 'maximum' Pads with the maximum value of all or part of the vector along each axis. 'mean' Pads with the mean value of all or part of the vector along each axis. 'median' Pads with the median value of all or part of the vector along each axis. 'minimum' Pads with the minimum value of all or part of the vector along each axis. 'reflect' Pads with the reflection of the vector mirrored on the first and last values of the vector along each axis. 'symmetric' Pads with the reflection of the vector mirrored along the edge of the array. 'wrap' Pads with the wrap of the vector along the axis. The first values are used to pad the end and the end values are used to pad the beginning. <function> Padding function, see Notes. stat_length : sequence or int, optional Used in 'maximum', 'mean', 'median', and 'minimum'. Number of values at edge of each axis used to calculate the statistic value. ((before_1, after_1), ... (before_N, after_N)) unique statistic lengths for each axis. ((before, after),) yields same before and after statistic lengths for each axis. (stat_length,) or int is a shortcut for before = after = statistic length for all axes. Default is ``None``, to use the entire axis. constant_values : sequence or int, optional Used in 'constant'. The values to set the padded values for each axis. ((before_1, after_1), ... (before_N, after_N)) unique pad constants for each axis. ((before, after),) yields same before and after constants for each axis. (constant,) or int is a shortcut for before = after = constant for all axes. Default is 0. end_values : sequence or int, optional Used in 'linear_ramp'. The values used for the ending value of the linear_ramp and that will form the edge of the padded array. ((before_1, after_1), ... (before_N, after_N)) unique end values for each axis. ((before, after),) yields same before and after end values for each axis. (constant,) or int is a shortcut for before = after = end value for all axes. Default is 0. reflect_type : {'even', 'odd'}, optional Used in 'reflect', and 'symmetric'. The 'even' style is the default with an unaltered reflection around the edge value. For the 'odd' style, the extended part of the array is created by subtracting the reflected values from two times the edge value. Returns ------- pad : ndarray Padded array of rank equal to `array` with shape increased according to `pad_width`. Notes ----- .. versionadded:: 1.7.0 For an array with rank greater than 1, some of the padding of later axes is calculated from padding of previous axes. This is easiest to think about with a rank 2 array where the corners of the padded array are calculated by using padded values from the first axis. The padding function, if used, should return a rank 1 array equal in length to the vector argument with padded values replaced. It has the following signature:: padding_func(vector, iaxis_pad_width, iaxis, kwargs) where vector : ndarray A rank 1 array already padded with zeros. Padded values are vector[:pad_tuple[0]] and vector[-pad_tuple[1]:]. iaxis_pad_width : tuple A 2-tuple of ints, iaxis_pad_width[0] represents the number of values padded at the beginning of vector where iaxis_pad_width[1] represents the number of values padded at the end of vector. iaxis : int The axis currently being calculated. kwargs : dict Any keyword arguments the function requires. Examples -------- >>> a = [1, 2, 3, 4, 5] >>> np.pad(a, (2,3), 'constant', constant_values=(4, 6)) array([4, 4, 1, 2, 3, 4, 5, 6, 6, 6]) >>> np.pad(a, (2, 3), 'edge') array([1, 1, 1, 2, 3, 4, 5, 5, 5, 5]) >>> np.pad(a, (2, 3), 'linear_ramp', end_values=(5, -4)) array([ 5, 3, 1, 2, 3, 4, 5, 2, -1, -4]) >>> np.pad(a, (2,), 'maximum') array([5, 5, 1, 2, 3, 4, 5, 5, 5]) >>> np.pad(a, (2,), 'mean') array([3, 3, 1, 2, 3, 4, 5, 3, 3]) >>> np.pad(a, (2,), 'median') array([3, 3, 1, 2, 3, 4, 5, 3, 3]) >>> a = [[1, 2], [3, 4]] >>> np.pad(a, ((3, 2), (2, 3)), 'minimum') array([[1, 1, 1, 2, 1, 1, 1], [1, 1, 1, 2, 1, 1, 1], [1, 1, 1, 2, 1, 1, 1], [1, 1, 1, 2, 1, 1, 1], [3, 3, 3, 4, 3, 3, 3], [1, 1, 1, 2, 1, 1, 1], [1, 1, 1, 2, 1, 1, 1]]) >>> a = [1, 2, 3, 4, 5] >>> np.pad(a, (2, 3), 'reflect') array([3, 2, 1, 2, 3, 4, 5, 4, 3, 2]) >>> np.pad(a, (2, 3), 'reflect', reflect_type='odd') array([-1, 0, 1, 2, 3, 4, 5, 6, 7, 8]) >>> np.pad(a, (2, 3), 'symmetric') array([2, 1, 1, 2, 3, 4, 5, 5, 4, 3]) >>> np.pad(a, (2, 3), 'symmetric', reflect_type='odd') array([0, 1, 1, 2, 3, 4, 5, 5, 6, 7]) >>> np.pad(a, (2, 3), 'wrap') array([4, 5, 1, 2, 3, 4, 5, 1, 2, 3]) >>> def pad_with(vector, pad_width, iaxis, kwargs): ... pad_value = kwargs.get('padder', 10) ... vector[:pad_width[0]] = pad_value ... vector[-pad_width[1]:] = pad_value ... return vector >>> a = np.arange(6) >>> a = a.reshape((2, 3)) >>> np.pad(a, 2, pad_with) array([[10, 10, 10, 10, 10, 10, 10], [10, 10, 10, 10, 10, 10, 10], [10, 10, 0, 1, 2, 10, 10], [10, 10, 3, 4, 5, 10, 10], [10, 10, 10, 10, 10, 10, 10], [10, 10, 10, 10, 10, 10, 10]]) >>> np.pad(a, 2, pad_with, padder=100) array([[100, 100, 100, 100, 100, 100, 100], [100, 100, 100, 100, 100, 100, 100], [100, 100, 0, 1, 2, 100, 100], [100, 100, 3, 4, 5, 100, 100], [100, 100, 100, 100, 100, 100, 100], [100, 100, 100, 100, 100, 100, 100]]) """ if not np.asarray(pad_width).dtype.kind == 'i': raise TypeError('`pad_width` must be of integral type.') narray = np.array(array) pad_width = _validate_lengths(narray, pad_width) allowedkwargs = { 'constant': ['constant_values'], 'edge': [], 'linear_ramp': ['end_values'], 'maximum': ['stat_length'], 'mean': ['stat_length'], 'median': ['stat_length'], 'minimum': ['stat_length'], 'reflect': ['reflect_type'], 'symmetric': ['reflect_type'], 'wrap': [], } kwdefaults = { 'stat_length': None, 'constant_values': 0, 'end_values': 0, 'reflect_type': 'even', } if isinstance(mode, np.compat.basestring): # Make sure have allowed kwargs appropriate for mode for key in kwargs: if key not in allowedkwargs[mode]: raise ValueError('%s keyword not in allowed keywords %s' % (key, allowedkwargs[mode])) # Set kwarg defaults for kw in allowedkwargs[mode]: kwargs.setdefault(kw, kwdefaults[kw]) # Need to only normalize particular keywords. for i in kwargs: if i == 'stat_length': kwargs[i] = _validate_lengths(narray, kwargs[i]) if i in ['end_values', 'constant_values']: kwargs[i] = _normalize_shape(narray, kwargs[i], cast_to_int=False) else: # Drop back to old, slower np.apply_along_axis mode for user-supplied # vector function function = mode # Create a new padded array rank = list(range(narray.ndim)) total_dim_increase = [np.sum(pad_width[i]) for i in rank] offset_slices = tuple( slice(pad_width[i][0], pad_width[i][0] + narray.shape[i]) for i in rank) new_shape = np.array(narray.shape) + total_dim_increase newmat = np.zeros(new_shape, narray.dtype) # Insert the original array into the padded array newmat[offset_slices] = narray # This is the core of pad ... for iaxis in rank: np.apply_along_axis(function, iaxis, newmat, pad_width[iaxis], iaxis, kwargs) return newmat # If we get here, use new padding method newmat = narray.copy() # API preserved, but completely new algorithm which pads by building the # entire block to pad before/after `arr` with in one step, for each axis. if mode == 'constant': for axis, ((pad_before, pad_after), (before_val, after_val)) \ in enumerate(zip(pad_width, kwargs['constant_values'])): newmat = _prepend_const(newmat, pad_before, before_val, axis) newmat = _append_const(newmat, pad_after, after_val, axis) elif mode == 'edge': for axis, (pad_before, pad_after) in enumerate(pad_width): newmat = _prepend_edge(newmat, pad_before, axis) newmat = _append_edge(newmat, pad_after, axis) elif mode == 'linear_ramp': for axis, ((pad_before, pad_after), (before_val, after_val)) \ in enumerate(zip(pad_width, kwargs['end_values'])): newmat = _prepend_ramp(newmat, pad_before, before_val, axis) newmat = _append_ramp(newmat, pad_after, after_val, axis) elif mode == 'maximum': for axis, ((pad_before, pad_after), (chunk_before, chunk_after)) \ in enumerate(zip(pad_width, kwargs['stat_length'])): newmat = _prepend_max(newmat, pad_before, chunk_before, axis) newmat = _append_max(newmat, pad_after, chunk_after, axis) elif mode == 'mean': for axis, ((pad_before, pad_after), (chunk_before, chunk_after)) \ in enumerate(zip(pad_width, kwargs['stat_length'])): newmat = _prepend_mean(newmat, pad_before, chunk_before, axis) newmat = _append_mean(newmat, pad_after, chunk_after, axis) elif mode == 'median': for axis, ((pad_before, pad_after), (chunk_before, chunk_after)) \ in enumerate(zip(pad_width, kwargs['stat_length'])): newmat = _prepend_med(newmat, pad_before, chunk_before, axis) newmat = _append_med(newmat, pad_after, chunk_after, axis) elif mode == 'minimum': for axis, ((pad_before, pad_after), (chunk_before, chunk_after)) \ in enumerate(zip(pad_width, kwargs['stat_length'])): newmat = _prepend_min(newmat, pad_before, chunk_before, axis) newmat = _append_min(newmat, pad_after, chunk_after, axis) elif mode == 'reflect': for axis, (pad_before, pad_after) in enumerate(pad_width): if narray.shape[axis] == 0: # Axes with non-zero padding cannot be empty. if pad_before > 0 or pad_after > 0: raise ValueError("There aren't any elements to reflect" " in axis {} of `array`".format(axis)) # Skip zero padding on empty axes. continue # Recursive padding along any axis where `pad_amt` is too large # for indexing tricks. We can only safely pad the original axis # length, to keep the period of the reflections consistent. if ((pad_before > 0) or (pad_after > 0)) and newmat.shape[axis] == 1: # Extending singleton dimension for 'reflect' is legacy # behavior; it really should raise an error. newmat = _prepend_edge(newmat, pad_before, axis) newmat = _append_edge(newmat, pad_after, axis) continue method = kwargs['reflect_type'] safe_pad = newmat.shape[axis] - 1 while ((pad_before > safe_pad) or (pad_after > safe_pad)): pad_iter_b = min(safe_pad, safe_pad * (pad_before // safe_pad)) pad_iter_a = min(safe_pad, safe_pad * (pad_after // safe_pad)) newmat = _pad_ref(newmat, (pad_iter_b, pad_iter_a), method, axis) pad_before -= pad_iter_b pad_after -= pad_iter_a safe_pad += pad_iter_b + pad_iter_a newmat = _pad_ref(newmat, (pad_before, pad_after), method, axis) elif mode == 'symmetric': for axis, (pad_before, pad_after) in enumerate(pad_width): # Recursive padding along any axis where `pad_amt` is too large # for indexing tricks. We can only safely pad the original axis # length, to keep the period of the reflections consistent. method = kwargs['reflect_type'] safe_pad = newmat.shape[axis] while ((pad_before > safe_pad) or (pad_after > safe_pad)): pad_iter_b = min(safe_pad, safe_pad * (pad_before // safe_pad)) pad_iter_a = min(safe_pad, safe_pad * (pad_after // safe_pad)) newmat = _pad_sym(newmat, (pad_iter_b, pad_iter_a), method, axis) pad_before -= pad_iter_b pad_after -= pad_iter_a safe_pad += pad_iter_b + pad_iter_a newmat = _pad_sym(newmat, (pad_before, pad_after), method, axis) elif mode == 'wrap': for axis, (pad_before, pad_after) in enumerate(pad_width): # Recursive padding along any axis where `pad_amt` is too large # for indexing tricks. We can only safely pad the original axis # length, to keep the period of the reflections consistent. safe_pad = newmat.shape[axis] while ((pad_before > safe_pad) or (pad_after > safe_pad)): pad_iter_b = min(safe_pad, safe_pad * (pad_before // safe_pad)) pad_iter_a = min(safe_pad, safe_pad * (pad_after // safe_pad)) newmat = _pad_wrap(newmat, (pad_iter_b, pad_iter_a), axis) pad_before -= pad_iter_b pad_after -= pad_iter_a safe_pad += pad_iter_b + pad_iter_a newmat = _pad_wrap(newmat, (pad_before, pad_after), axis) return newmat
def isin(element, test_elements, assume_unique=False, invert=False): """ Calculates `element in test_elements`, broadcasting over `element` only. Returns a boolean array of the same shape as `element` that is True where an element of `element` is in `test_elements` and False otherwise. Parameters ---------- element : array_like Input array. test_elements : array_like The values against which to test each value of `element`. This argument is flattened if it is an array or array_like. See notes for behavior with non-array-like parameters. assume_unique : bool, optional If True, the input arrays are both assumed to be unique, which can speed up the calculation. Default is False. invert : bool, optional If True, the values in the returned array are inverted, as if calculating `element not in test_elements`. Default is False. ``np.isin(a, b, invert=True)`` is equivalent to (but faster than) ``np.invert(np.isin(a, b))``. Returns ------- isin : ndarray, bool Has the same shape as `element`. The values `element[isin]` are in `test_elements`. See Also -------- in1d : Flattened version of this function. numpy.lib.arraysetops : Module with a number of other functions for performing set operations on arrays. Notes ----- `isin` is an element-wise function version of the python keyword `in`. ``isin(a, b)`` is roughly equivalent to ``np.array([item in b for item in a])`` if `a` and `b` are 1-D sequences. `element` and `test_elements` are converted to arrays if they are not already. If `test_elements` is a set (or other non-sequence collection) it will be converted to an object array with one element, rather than an array of the values contained in `test_elements`. This is a consequence of the `array` constructor's way of handling non-sequence collections. Converting the set to a list usually gives the desired behavior. .. versionadded:: 1.13.0 Examples -------- >>> element = 2*np.arange(4).reshape((2, 2)) >>> element array([[0, 2], [4, 6]]) >>> test_elements = [1, 2, 4, 8] >>> mask = np.isin(element, test_elements) >>> mask array([[ False, True], [ True, False]]) >>> element[mask] array([2, 4]) >>> mask = np.isin(element, test_elements, invert=True) >>> mask array([[ True, False], [ False, True]]) >>> element[mask] array([0, 6]) Because of how `array` handles sets, the following does not work as expected: >>> test_set = {1, 2, 4, 8} >>> np.isin(element, test_set) array([[ False, False], [ False, False]]) Casting the set to a list gives the expected result: >>> np.isin(element, list(test_set)) array([[ False, True], [ True, False]]) """ element = np.asarray(element) return in1d(element, test_elements, assume_unique=assume_unique, invert=invert).reshape(element.shape)
def __init__(self, value): self.value = np.asarray(value)
def test_unique_axis_list(self): msg = "Unique failed on list of lists" inp = [[0, 1, 0], [0, 1, 0]] inp_arr = np.asarray(inp) assert_array_equal(unique(inp, axis=0), unique(inp_arr, axis=0), msg) assert_array_equal(unique(inp, axis=1), unique(inp_arr, axis=1), msg)
def _isin_slow(a, b): b = np.asarray(b).flatten().tolist() return a in b
def _get_bin_edges(a, bins, range, weights): """ Computes the bins used internally by `histogram`. Parameters ========== a : ndarray Ravelled data array bins, range Forwarded arguments from `histogram`. weights : ndarray, optional Ravelled weights array, or None Returns ======= bin_edges : ndarray Array of bin edges uniform_bins : (Number, Number, int): The upper bound, lowerbound, and number of bins, used in the optimized implementation of `histogram` that works on uniform bins. """ # parse the overloaded bins argument n_equal_bins = None bin_edges = None if isinstance(bins, basestring): bin_name = bins # if `bins` is a string for an automatic method, # this will replace it with the number of bins calculated if bin_name not in _hist_bin_selectors: raise ValueError( "{!r} is not a valid estimator for `bins`".format(bin_name)) if weights is not None: raise TypeError("Automated estimation of the number of " "bins is not supported for weighted data") first_edge, last_edge = _get_outer_edges(a, range) # truncate the range if needed if range is not None: keep = (a >= first_edge) keep &= (a <= last_edge) if not np.logical_and.reduce(keep): a = a[keep] if a.size == 0: n_equal_bins = 1 else: # Do not call selectors on empty arrays width = _hist_bin_selectors[bin_name](a) if width: n_equal_bins = int( np.ceil(_unsigned_subtract(last_edge, first_edge) / width)) else: # Width can be zero for some estimators, e.g. FD when # the IQR of the data is zero. n_equal_bins = 1 elif np.ndim(bins) == 0: try: n_equal_bins = operator.index(bins) except TypeError: raise TypeError('`bins` must be an integer, a string, or an array') if n_equal_bins < 1: raise ValueError('`bins` must be positive, when an integer') first_edge, last_edge = _get_outer_edges(a, range) elif np.ndim(bins) == 1: bin_edges = np.asarray(bins) if np.any(bin_edges[:-1] > bin_edges[1:]): raise ValueError( '`bins` must increase monotonically, when an array') else: raise ValueError('`bins` must be 1d, when an array') if n_equal_bins is not None: # gh-10322 means that type resolution rules are dependent on array # shapes. To avoid this causing problems, we pick a type now and stick # with it throughout. bin_type = np.result_type(first_edge, last_edge, a) if np.issubdtype(bin_type, np.integer): bin_type = np.result_type(bin_type, float) # bin edges must be computed bin_edges = np.linspace(first_edge, last_edge, n_equal_bins + 1, endpoint=True, dtype=bin_type) return bin_edges, (first_edge, last_edge, n_equal_bins) else: return bin_edges, None
def histogramdd(sample, bins=10, range=None, normed=None, weights=None, density=None): """ Compute the multidimensional histogram of some data. Parameters ---------- sample : (N, D) array, or (D, N) array_like The data to be histogrammed. Note the unusual interpretation of sample when an array_like: * When an array, each row is a coordinate in a D-dimensional space - such as ``histogramgramdd(np.array([p1, p2, p3]))``. * When an array_like, each element is the list of values for single coordinate - such as ``histogramgramdd((X, Y, Z))``. The first form should be preferred. bins : sequence or int, optional The bin specification: * A sequence of arrays describing the bin edges along each dimension. * The number of bins for each dimension (nx, ny, ... =bins) * The number of bins for all dimensions (nx=ny=...=bins). range : sequence, optional A sequence of length D, each an optional (lower, upper) tuple giving the outer bin edges to be used if the edges are not given explicitly in `bins`. An entry of None in the sequence results in the minimum and maximum values being used for the corresponding dimension. The default, None, is equivalent to passing a tuple of D None values. density : bool, optional If False, the default, returns the number of samples in each bin. If True, returns the probability *density* function at the bin, ``bin_count / sample_count / bin_volume``. normed : bool, optional An alias for the density argument that behaves identically. To avoid confusion with the broken normed argument to `histogram`, `density` should be preferred. weights : (N,) array_like, optional An array of values `w_i` weighing each sample `(x_i, y_i, z_i, ...)`. Weights are normalized to 1 if normed is True. If normed is False, the values of the returned histogram are equal to the sum of the weights belonging to the samples falling into each bin. Returns ------- H : ndarray The multidimensional histogram of sample x. See normed and weights for the different possible semantics. edges : list A list of D arrays describing the bin edges for each dimension. See Also -------- histogram: 1-D histogram histogram2d: 2-D histogram Examples -------- >>> r = np.random.randn(100,3) >>> H, edges = np.histogramdd(r, bins = (5, 8, 4)) >>> H.shape, edges[0].size, edges[1].size, edges[2].size ((5, 8, 4), 6, 9, 5) """ try: # Sample is an ND-array. N, D = sample.shape except (AttributeError, ValueError): # Sample is a sequence of 1D arrays. sample = np.atleast_2d(sample).T N, D = sample.shape nbin = np.empty(D, int) edges = D * [None] dedges = D * [None] if weights is not None: weights = np.asarray(weights) try: M = len(bins) if M != D: raise ValueError( 'The dimension of bins must be equal to the dimension of the ' ' sample x.') except TypeError: # bins is an integer bins = D * [bins] # normalize the range argument if range is None: range = (None, ) * D elif len(range) != D: raise ValueError('range argument must have one entry per dimension') # Create edge arrays for i in _range(D): if np.ndim(bins[i]) == 0: if bins[i] < 1: raise ValueError( '`bins[{}]` must be positive, when an integer'.format(i)) smin, smax = _get_outer_edges(sample[:, i], range[i]) edges[i] = np.linspace(smin, smax, bins[i] + 1) elif np.ndim(bins[i]) == 1: edges[i] = np.asarray(bins[i]) if np.any(edges[i][:-1] > edges[i][1:]): raise ValueError( '`bins[{}]` must be monotonically increasing, when an array' .format(i)) else: raise ValueError( '`bins[{}]` must be a scalar or 1d array'.format(i)) nbin[i] = len(edges[i]) + 1 # includes an outlier on each end dedges[i] = np.diff(edges[i]) # Compute the bin number each sample falls into. Ncount = tuple( # avoid np.digitize to work around gh-11022 np.searchsorted(edges[i], sample[:, i], side='right') for i in _range(D)) # Using digitize, values that fall on an edge are put in the right bin. # For the rightmost bin, we want values equal to the right edge to be # counted in the last bin, and not as an outlier. for i in _range(D): # Find which points are on the rightmost edge. on_edge = (sample[:, i] == edges[i][-1]) # Shift these points one bin to the left. Ncount[i][on_edge] -= 1 # Compute the sample indices in the flattened histogram matrix. # This raises an error if the array is too large. xy = np.ravel_multi_index(Ncount, nbin) # Compute the number of repetitions in xy and assign it to the # flattened histmat. hist = np.bincount(xy, weights, minlength=nbin.prod()) # Shape into a proper matrix hist = hist.reshape(nbin) # This preserves the (bad) behavior observed in gh-7845, for now. hist = hist.astype(float, casting='safe') # Remove outliers (indices 0 and -1 for each dimension). core = D * (slice(1, -1), ) hist = hist[core] # handle the aliasing normed argument if normed is None: if density is None: density = False elif density is None: # an explicit normed argument was passed, alias it to the new name density = normed else: raise TypeError("Cannot specify both 'normed' and 'density'") if density: # calculate the probability density function s = hist.sum() for i in _range(D): shape = np.ones(D, int) shape[i] = nbin[i] - 2 hist = hist / dedges[i].reshape(shape) hist /= s if (hist.shape != nbin - 2).any(): raise RuntimeError("Internal Shape Error") return hist, edges
def in1d(ar1, ar2, assume_unique=False, invert=False): """ Test whether each element of a 1-D array is also present in a second array. Returns a boolean array the same length as `ar1` that is True where an element of `ar1` is in `ar2` and False otherwise. We recommend using :func:`isin` instead of `in1d` for new code. Parameters ---------- ar1 : (M,) array_like Input array. ar2 : array_like The values against which to test each value of `ar1`. assume_unique : bool, optional If True, the input arrays are both assumed to be unique, which can speed up the calculation. Default is False. invert : bool, optional If True, the values in the returned array are inverted (that is, False where an element of `ar1` is in `ar2` and True otherwise). Default is False. ``np.in1d(a, b, invert=True)`` is equivalent to (but is faster than) ``np.invert(in1d(a, b))``. .. versionadded:: 1.8.0 Returns ------- in1d : (M,) ndarray, bool The values `ar1[in1d]` are in `ar2`. See Also -------- isin : Version of this function that preserves the shape of ar1. numpy.lib.arraysetops : Module with a number of other functions for performing set operations on arrays. Notes ----- `in1d` can be considered as an element-wise function version of the python keyword `in`, for 1-D sequences. ``in1d(a, b)`` is roughly equivalent to ``np.array([item in b for item in a])``. However, this idea fails if `ar2` is a set, or similar (non-sequence) container: As ``ar2`` is converted to an array, in those cases ``asarray(ar2)`` is an object array rather than the expected array of contained values. .. versionadded:: 1.4.0 Examples -------- >>> test = np.array([0, 1, 2, 5, 0]) >>> states = [0, 2] >>> mask = np.in1d(test, states) >>> mask array([ True, False, True, False, True]) >>> test[mask] array([0, 2, 0]) >>> mask = np.in1d(test, states, invert=True) >>> mask array([False, True, False, True, False]) >>> test[mask] array([1, 5]) """ # Ravel both arrays, behavior for the first array could be different ar1 = np.asarray(ar1).ravel() ar2 = np.asarray(ar2).ravel() # Check if one of the arrays may contain arbitrary objects contains_object = ar1.dtype.hasobject or ar2.dtype.hasobject # This code is run when # a) the first condition is true, making the code significantly faster # b) the second condition is true (i.e. `ar1` or `ar2` may contain # arbitrary objects), since then sorting is not guaranteed to work if len(ar2) < 10 * len(ar1)**0.145 or contains_object: if invert: mask = np.ones(len(ar1), dtype=bool) for a in ar2: mask &= (ar1 != a) else: mask = np.zeros(len(ar1), dtype=bool) for a in ar2: mask |= (ar1 == a) return mask # Otherwise use sorting if not assume_unique: ar1, rev_idx = np.unique(ar1, return_inverse=True) ar2 = np.unique(ar2) ar = np.concatenate((ar1, ar2)) # We need this to be a stable sort, so always use 'mergesort' # here. The values from the first array should always come before # the values from the second array. order = ar.argsort(kind='mergesort') sar = ar[order] if invert: bool_ar = (sar[1:] != sar[:-1]) else: bool_ar = (sar[1:] == sar[:-1]) flag = np.concatenate((bool_ar, [invert])) ret = np.empty(ar.shape, dtype=bool) ret[order] = flag if assume_unique: return ret[:len(ar1)] else: return ret[rev_idx]