def test_testUfuncs1(self): # Test various functions such as sin, cos. (x, y, a10, m1, m2, xm, ym, z, zm, xf, s) = self.d assert_(eq(np.cos(x), cos(xm))) assert_(eq(np.cosh(x), cosh(xm))) assert_(eq(np.sin(x), sin(xm))) assert_(eq(np.sinh(x), sinh(xm))) assert_(eq(np.tan(x), tan(xm))) assert_(eq(np.tanh(x), tanh(xm))) with np.errstate(divide='ignore', invalid='ignore'): assert_(eq(np.sqrt(abs(x)), sqrt(xm))) assert_(eq(np.log(abs(x)), log(xm))) assert_(eq(np.log10(abs(x)), log10(xm))) assert_(eq(np.exp(x), exp(xm))) assert_(eq(np.arcsin(z), arcsin(zm))) assert_(eq(np.arccos(z), arccos(zm))) assert_(eq(np.arctan(z), arctan(zm))) assert_(eq(np.arctan2(x, y), arctan2(xm, ym))) assert_(eq(np.absolute(x), absolute(xm))) assert_(eq(np.equal(x, y), equal(xm, ym))) assert_(eq(np.not_equal(x, y), not_equal(xm, ym))) assert_(eq(np.less(x, y), less(xm, ym))) assert_(eq(np.greater(x, y), greater(xm, ym))) assert_(eq(np.less_equal(x, y), less_equal(xm, ym))) assert_(eq(np.greater_equal(x, y), greater_equal(xm, ym))) assert_(eq(np.conjugate(x), conjugate(xm))) assert_(eq(np.concatenate((x, y)), concatenate((xm, ym)))) assert_(eq(np.concatenate((x, y)), concatenate((x, y)))) assert_(eq(np.concatenate((x, y)), concatenate((xm, y)))) assert_(eq(np.concatenate((x, y, x)), concatenate((x, ym, x))))
def test_exceptions(self): # test axis must be in bounds for ndim in [1, 2, 3]: a = np.ones((1,)*ndim) np.concatenate((a, a), axis=0) # OK assert_raises(np.AxisError, np.concatenate, (a, a), axis=ndim) assert_raises(np.AxisError, np.concatenate, (a, a), axis=-(ndim + 1)) # Scalars cannot be concatenated assert_raises(ValueError, concatenate, (0,)) assert_raises(ValueError, concatenate, (np.array(0),)) # test shapes must match except for concatenation axis a = np.ones((1, 2, 3)) b = np.ones((2, 2, 3)) axis = list(range(3)) for i in range(3): np.concatenate((a, b), axis=axis[0]) # OK assert_raises(ValueError, np.concatenate, (a, b), axis=axis[1]) assert_raises(ValueError, np.concatenate, (a, b), axis=axis[2]) a = np.moveaxis(a, -1, 0) b = np.moveaxis(b, -1, 0) axis.append(axis.pop(0)) # No arrays to concatenate raises ValueError assert_raises(ValueError, concatenate, ())
def test_hfft(self): x = random(14) + 1j*random(14) x_herm = np.concatenate((random(1), x, random(1))) x = np.concatenate((x_herm, x[::-1].conj())) assert_array_almost_equal(np.fft.fft(x), np.fft.hfft(x_herm)) assert_array_almost_equal(np.fft.hfft(x_herm) / np.sqrt(30), np.fft.hfft(x_herm, norm="ortho"))
def test_large_concatenate_axis_None(self): # When no axis is given, concatenate uses flattened versions. # This also had a bug with many arrays (see gh-5979). x = np.arange(1, 100) r = np.concatenate(x, None) assert_array_equal(x, r) # This should probably be deprecated: r = np.concatenate(x, 100) # axis is >= MAXDIMS assert_array_equal(x, r)
def test_weights(self): v = np.random.rand(100) w = np.ones(100) * 5 a, b = histogram(v) na, nb = histogram(v, density=True) wa, wb = histogram(v, weights=w) nwa, nwb = histogram(v, weights=w, density=True) assert_array_almost_equal(a * 5, wa) assert_array_almost_equal(na, nwa) # Check weights are properly applied. v = np.linspace(0, 10, 10) w = np.concatenate((np.zeros(5), np.ones(5))) wa, wb = histogram(v, bins=np.arange(11), weights=w) assert_array_almost_equal(wa, w) # Check with integer weights wa, wb = histogram([1, 2, 2, 4], bins=4, weights=[4, 3, 2, 1]) assert_array_equal(wa, [4, 5, 0, 1]) wa, wb = histogram([1, 2, 2, 4], bins=4, weights=[4, 3, 2, 1], density=True) assert_array_almost_equal(wa, np.array([4, 5, 0, 1]) / 10. / 3. * 4) # Check weights with non-uniform bin widths a, b = histogram(np.arange(9), [0, 1, 3, 6, 10], weights=[2, 1, 1, 1, 1, 1, 1, 1, 1], density=True) assert_almost_equal(a, [.2, .1, .1, .075])
def union1d(ar1, ar2): """ Find the union of two arrays. Return the unique, sorted array of values that are in either of the two input arrays. Parameters ---------- ar1, ar2 : array_like Input arrays. They are flattened if they are not already 1D. Returns ------- union1d : ndarray Unique, sorted union of the input arrays. See Also -------- numpy.lib.arraysetops : Module with a number of other functions for performing set operations on arrays. Examples -------- >>> np.union1d([-1, 0, 1], [-2, 0, 2]) array([-2, -1, 0, 1, 2]) To find the union of more than two arrays, use functools.reduce: >>> from functools import reduce >>> reduce(np.union1d, ([1, 3, 4, 3], [3, 1, 2, 1], [6, 3, 4, 2])) array([1, 2, 3, 4, 6]) """ return unique(np.concatenate((ar1, ar2), axis=None))
def _unique1d(ar, return_index=False, return_inverse=False, return_counts=False): """ Find the unique elements of an array, ignoring shape. """ ar = np.asanyarray(ar).flatten() optional_indices = return_index or return_inverse if optional_indices: perm = ar.argsort(kind='mergesort' if return_index else 'quicksort') aux = ar[perm] else: ar.sort() aux = ar mask = np.empty(aux.shape, dtype=np.bool_) mask[:1] = True mask[1:] = aux[1:] != aux[:-1] ret = (aux[mask], ) if return_index: ret += (perm[mask], ) if return_inverse: imask = np.cumsum(mask) - 1 inv_idx = np.empty(mask.shape, dtype=np.intp) inv_idx[perm] = imask ret += (inv_idx, ) if return_counts: idx = np.concatenate(np.nonzero(mask) + ([mask.size], )) ret += (np.diff(idx), ) return ret
def _search_sorted_inclusive(a, v): """ Like `searchsorted`, but where the last item in `v` is placed on the right. In the context of a histogram, this makes the last bin edge inclusive """ return np.concatenate( (a.searchsorted(v[:-1], 'left'), a.searchsorted(v[-1:], 'right')))
def test_polyfit(self): c = np.array([3., 2., 1.]) x = np.linspace(0, 2, 7) y = np.polyval(c, x) err = [1, -1, 1, -1, 1, -1, 1] weights = np.arange(8, 1, -1)**2/7.0 # Check exception when too few points for variance estimate. Note that # the Bayesian estimate requires the number of data points to exceed # degree + 3. assert_raises(ValueError, np.polyfit, [0, 1, 3], [0, 1, 3], deg=0, cov=True) # check 1D case m, cov = np.polyfit(x, y+err, 2, cov=True) est = [3.8571, 0.2857, 1.619] assert_almost_equal(est, m, decimal=4) val0 = [[2.9388, -5.8776, 1.6327], [-5.8776, 12.7347, -4.2449], [1.6327, -4.2449, 2.3220]] assert_almost_equal(val0, cov, decimal=4) m2, cov2 = np.polyfit(x, y+err, 2, w=weights, cov=True) assert_almost_equal([4.8927, -1.0177, 1.7768], m2, decimal=4) val = [[8.7929, -10.0103, 0.9756], [-10.0103, 13.6134, -1.8178], [0.9756, -1.8178, 0.6674]] assert_almost_equal(val, cov2, decimal=4) # check 2D (n,1) case y = y[:, np.newaxis] c = c[:, np.newaxis] assert_almost_equal(c, np.polyfit(x, y, 2)) # check 2D (n,2) case yy = np.concatenate((y, y), axis=1) cc = np.concatenate((c, c), axis=1) assert_almost_equal(cc, np.polyfit(x, yy, 2)) m, cov = np.polyfit(x, yy + np.array(err)[:, np.newaxis], 2, cov=True) assert_almost_equal(est, m[:, 0], decimal=4) assert_almost_equal(est, m[:, 1], decimal=4) assert_almost_equal(val0, cov[:, :, 0], decimal=4) assert_almost_equal(val0, cov[:, :, 1], decimal=4)
def test_concatenate_axis_None(self): a = np.arange(4, dtype=np.float64).reshape((2, 2)) b = list(range(3)) c = ['x'] r = np.concatenate((a, a), axis=None) assert_equal(r.dtype, a.dtype) assert_equal(r.ndim, 1) r = np.concatenate((a, b), axis=None) assert_equal(r.size, a.size + len(b)) assert_equal(r.dtype, a.dtype) r = np.concatenate((a, b, c), axis=None) d = array(['0.0', '1.0', '2.0', '3.0', '0', '1', '2', 'x']) assert_array_equal(r, d) out = np.zeros(a.size + len(b)) r = np.concatenate((a, b), axis=None) rout = np.concatenate((a, b), axis=None, out=out) assert_(out is rout) assert_equal(r, rout)
def setxor1d(ar1, ar2, assume_unique=False): """ Find the set exclusive-or of two arrays. Return the sorted, unique values that are in only one (not both) of the input arrays. Parameters ---------- ar1, ar2 : array_like Input arrays. assume_unique : bool If True, the input arrays are both assumed to be unique, which can speed up the calculation. Default is False. Returns ------- setxor1d : ndarray Sorted 1D array of unique values that are in only one of the input arrays. Examples -------- >>> a = np.array([1, 2, 3, 2, 4]) >>> b = np.array([2, 3, 5, 7, 5]) >>> np.setxor1d(a,b) array([1, 4, 5, 7]) """ if not assume_unique: ar1 = unique(ar1) ar2 = unique(ar2) aux = np.concatenate((ar1, ar2)) if aux.size == 0: return aux aux.sort() flag = np.concatenate(([True], aux[1:] != aux[:-1], [True])) return aux[flag[1:] & flag[:-1]]
def find_duplicates(a, key=None, ignoremask=True, return_index=False): """ Find the duplicates in a structured array along a given key Parameters ---------- a : array-like Input array key : {string, None}, optional Name of the fields along which to check the duplicates. If None, the search is performed by records ignoremask : {True, False}, optional Whether masked data should be discarded or considered as duplicates. return_index : {False, True}, optional Whether to return the indices of the duplicated values. Examples -------- >>> from numpy1.lib import recfunctions as rfn >>> ndtype = [('a', int)] >>> a = np.ma.array([1, 1, 1, 2, 2, 3, 3], ... mask=[0, 0, 1, 0, 0, 0, 1]).view(ndtype) >>> rfn.find_duplicates(a, ignoremask=True, return_index=True) ... # XXX: judging by the output, the ignoremask flag has no effect """ a = np.asanyarray(a).ravel() # Get a dictionary of fields fields = get_fieldstructure(a.dtype) # Get the sorting data (by selecting the corresponding field) base = a if key: for f in fields[key]: base = base[f] base = base[key] # Get the sorting indices and the sorted data sortidx = base.argsort() sortedbase = base[sortidx] sorteddata = sortedbase.filled() # Compare the sorting data flag = (sorteddata[:-1] == sorteddata[1:]) # If masked data must be ignored, set the flag to false where needed if ignoremask: sortedmask = sortedbase.recordmask flag[sortedmask[1:]] = False flag = np.concatenate(([False], flag)) # We need to take the point on the left as well (else we're missing it) flag[:-1] = flag[:-1] + flag[1:] duplicates = a[sortidx][flag] if return_index: return (duplicates, sortidx[flag]) else: return duplicates
def test_simple(self): """ Straightforward testing with a mixture of linspace data (for consistency). All test values have been precomputed and the values shouldn't change """ # Some basic sanity checking, with some fixed data. # Checking for the correct number of bins basic_test = { 50: { 'fd': 4, 'scott': 4, 'rice': 8, 'sturges': 7, 'doane': 8, 'sqrt': 8, 'auto': 7 }, 500: { 'fd': 8, 'scott': 8, 'rice': 16, 'sturges': 10, 'doane': 12, 'sqrt': 23, 'auto': 10 }, 5000: { 'fd': 17, 'scott': 17, 'rice': 35, 'sturges': 14, 'doane': 17, 'sqrt': 71, 'auto': 17 } } for testlen, expectedResults in basic_test.items(): # Create some sort of non uniform data to test with # (2 peak uniform mixture) x1 = np.linspace(-10, -1, testlen // 5 * 2) x2 = np.linspace(1, 10, testlen // 5 * 3) x = np.concatenate((x1, x2)) for estimator, numbins in expectedResults.items(): a, b = np.histogram(x, estimator) assert_equal(len(a), numbins, err_msg="For the {0} estimator " "with datasize of {1}".format(estimator, testlen))
def _pad_wrap(arr, pad_amt, axis=-1): """ Pad `axis` of `arr` via wrapping. Parameters ---------- arr : ndarray Input array of arbitrary shape. pad_amt : tuple of ints, length 2 Padding to (prepend, append) along `axis`. axis : int Axis along which to pad `arr`. Returns ------- padarr : ndarray Output array, with `pad_amt[0]` values prepended and `pad_amt[1]` values appended along `axis`. Both regions are padded wrapped values from the opposite end of `axis`. Notes ----- This method of padding is also known as 'tile' or 'tiling'. The modes 'reflect', 'symmetric', and 'wrap' must be padded with a single function, lest the indexing tricks in non-integer multiples of the original shape would violate repetition in the final iteration. """ # Implicit booleanness to test for zero (or None) in any scalar type if pad_amt[0] == 0 and pad_amt[1] == 0: return arr ########################################################################## # Prepended region # Slice off a reverse indexed chunk from near edge to pad `arr` before wrap_slice = _slice_last(arr.shape, pad_amt[0], axis=axis) wrap_chunk1 = arr[wrap_slice] ########################################################################## # Appended region # Slice off a reverse indexed chunk from far edge to pad `arr` after wrap_slice = _slice_first(arr.shape, pad_amt[1], axis=axis) wrap_chunk2 = arr[wrap_slice] # Concatenate `arr` with both chunks, extending along `axis` return np.concatenate((wrap_chunk1, arr, wrap_chunk2), axis=axis)
def test_testAddSumProd(self): # Test add, sum, product. (x, y, a10, m1, m2, xm, ym, z, zm, xf, s) = self.d assert_(eq(np.add.reduce(x), add.reduce(x))) assert_(eq(np.add.accumulate(x), add.accumulate(x))) assert_(eq(4, sum(array(4), axis=0))) assert_(eq(4, sum(array(4), axis=0))) assert_(eq(np.sum(x, axis=0), sum(x, axis=0))) assert_(eq(np.sum(filled(xm, 0), axis=0), sum(xm, axis=0))) assert_(eq(np.sum(x, 0), sum(x, 0))) assert_(eq(np.product(x, axis=0), product(x, axis=0))) assert_(eq(np.product(x, 0), product(x, 0))) assert_(eq(np.product(filled(xm, 1), axis=0), product(xm, axis=0))) if len(s) > 1: assert_(eq(np.concatenate((x, y), 1), concatenate((xm, ym), 1))) assert_(eq(np.add.reduce(x, 1), add.reduce(x, 1))) assert_(eq(np.sum(x, 1), sum(x, 1))) assert_(eq(np.product(x, 1), product(x, 1)))
def setup(self): # An array of all possible float16 values self.all_f16 = np.arange(0x10000, dtype=uint16) self.all_f16.dtype = float16 self.all_f32 = np.array(self.all_f16, dtype=float32) self.all_f64 = np.array(self.all_f16, dtype=float64) # An array of all non-NaN float16 values, in sorted order self.nonan_f16 = np.concatenate( (np.arange(0xfc00, 0x7fff, -1, dtype=uint16), np.arange(0x0000, 0x7c01, 1, dtype=uint16))) self.nonan_f16.dtype = float16 self.nonan_f32 = np.array(self.nonan_f16, dtype=float32) self.nonan_f64 = np.array(self.nonan_f16, dtype=float64) # An array of all finite float16 values, in sorted order self.finite_f16 = self.nonan_f16[1:-1] self.finite_f32 = self.nonan_f32[1:-1] self.finite_f64 = self.nonan_f64[1:-1]
def test_poly(self): assert_array_almost_equal(np.poly([3, -np.sqrt(2), np.sqrt(2)]), [1, -3, -2, 6]) # From matlab docs A = [[1, 2, 3], [4, 5, 6], [7, 8, 0]] assert_array_almost_equal(np.poly(A), [1, -6, -72, -27]) # Should produce real output for perfect conjugates assert_(np.isrealobj(np.poly([+1.082j, +2.613j, -2.613j, -1.082j]))) assert_(np.isrealobj(np.poly([0+1j, -0+-1j, 1+2j, 1-2j, 1.+3.5j, 1-3.5j]))) assert_(np.isrealobj(np.poly([1j, -1j, 1+2j, 1-2j, 1+3j, 1-3.j]))) assert_(np.isrealobj(np.poly([1j, -1j, 1+2j, 1-2j]))) assert_(np.isrealobj(np.poly([1j, -1j, 2j, -2j]))) assert_(np.isrealobj(np.poly([1j, -1j]))) assert_(np.isrealobj(np.poly([1, -1]))) assert_(np.iscomplexobj(np.poly([1j, -1.0000001j]))) np.random.seed(42) a = np.random.randn(100) + 1j*np.random.randn(100) assert_(np.isrealobj(np.poly(np.concatenate((a, np.conjugate(a))))))
def histogram(a, bins=10, range=None, normed=None, weights=None, density=None): r""" Compute the histogram of a set of data. Parameters ---------- a : array_like Input data. The histogram is computed over the flattened array. bins : int or sequence of scalars or str, optional If `bins` is an int, it defines the number of equal-width bins in the given range (10, by default). If `bins` is a sequence, it defines the bin edges, including the rightmost edge, allowing for non-uniform bin widths. .. versionadded:: 1.11.0 If `bins` is a string, it defines the method used to calculate the optimal bin width, as defined by `histogram_bin_edges`. range : (float, float), optional The lower and upper range of the bins. If not provided, range is simply ``(a.min(), a.max())``. Values outside the range are ignored. The first element of the range must be less than or equal to the second. `range` affects the automatic bin computation as well. While bin width is computed to be optimal based on the actual data within `range`, the bin count will fill the entire range including portions containing no data. normed : bool, optional .. deprecated:: 1.6.0 This is equivalent to the `density` argument, but produces incorrect results for unequal bin widths. It should not be used. .. versionchanged:: 1.15.0 DeprecationWarnings are actually emitted. weights : array_like, optional An array of weights, of the same shape as `a`. Each value in `a` only contributes its associated weight towards the bin count (instead of 1). If `density` is True, the weights are normalized, so that the integral of the density over the range remains 1. density : bool, optional If ``False``, the result will contain the number of samples in each bin. If ``True``, the result is the value of the probability *density* function at the bin, normalized such that the *integral* over the range is 1. Note that the sum of the histogram values will not be equal to 1 unless bins of unity width are chosen; it is not a probability *mass* function. Overrides the ``normed`` keyword if given. Returns ------- hist : array The values of the histogram. See `density` and `weights` for a description of the possible semantics. bin_edges : array of dtype float Return the bin edges ``(length(hist)+1)``. See Also -------- histogramdd, bincount, searchsorted, digitize, histogram_bin_edges Notes ----- All but the last (righthand-most) bin is half-open. In other words, if `bins` is:: [1, 2, 3, 4] then the first bin is ``[1, 2)`` (including 1, but excluding 2) and the second ``[2, 3)``. The last bin, however, is ``[3, 4]``, which *includes* 4. Examples -------- >>> np.histogram([1, 2, 1], bins=[0, 1, 2, 3]) (array([0, 2, 1]), array([0, 1, 2, 3])) >>> np.histogram(np.arange(4), bins=np.arange(5), density=True) (array([ 0.25, 0.25, 0.25, 0.25]), array([0, 1, 2, 3, 4])) >>> np.histogram([[1, 2, 1], [1, 0, 1]], bins=[0,1,2,3]) (array([1, 4, 1]), array([0, 1, 2, 3])) >>> a = np.arange(5) >>> hist, bin_edges = np.histogram(a, density=True) >>> hist array([ 0.5, 0. , 0.5, 0. , 0. , 0.5, 0. , 0.5, 0. , 0.5]) >>> hist.sum() 2.4999999999999996 >>> np.sum(hist * np.diff(bin_edges)) 1.0 .. versionadded:: 1.11.0 Automated Bin Selection Methods example, using 2 peak random data with 2000 points: >>> import matplotlib.pyplot as plt >>> rng = np.random.RandomState(10) # deterministic random data >>> a = np.hstack((rng.normal(size=1000), ... rng.normal(loc=5, scale=2, size=1000))) >>> plt.hist(a, bins='auto') # arguments are passed to np.histogram >>> plt.title("Histogram with 'auto' bins") >>> plt.show() """ a, weights = _ravel_and_check_weights(a, weights) bin_edges, uniform_bins = _get_bin_edges(a, bins, range, weights) # Histogram is an integer or a float array depending on the weights. if weights is None: ntype = np.dtype(np.intp) else: ntype = weights.dtype # We set a block size, as this allows us to iterate over chunks when # computing histograms, to minimize memory usage. BLOCK = 65536 # The fast path uses bincount, but that only works for certain types # of weight simple_weights = (weights is None or np.can_cast(weights.dtype, np.double) or np.can_cast(weights.dtype, complex)) if uniform_bins is not None and simple_weights: # Fast algorithm for equal bins # We now convert values of a to bin indices, under the assumption of # equal bin widths (which is valid here). first_edge, last_edge, n_equal_bins = uniform_bins # Initialize empty histogram n = np.zeros(n_equal_bins, ntype) # Pre-compute histogram scaling factor norm = n_equal_bins / _unsigned_subtract(last_edge, first_edge) # We iterate over blocks here for two reasons: the first is that for # large arrays, it is actually faster (for example for a 10^8 array it # is 2x as fast) and it results in a memory footprint 3x lower in the # limit of large arrays. for i in _range(0, len(a), BLOCK): tmp_a = a[i:i + BLOCK] if weights is None: tmp_w = None else: tmp_w = weights[i:i + BLOCK] # Only include values in the right range keep = (tmp_a >= first_edge) keep &= (tmp_a <= last_edge) if not np.logical_and.reduce(keep): tmp_a = tmp_a[keep] if tmp_w is not None: tmp_w = tmp_w[keep] # This cast ensures no type promotions occur below, which gh-10322 # make unpredictable. Getting it wrong leads to precision errors # like gh-8123. tmp_a = tmp_a.astype(bin_edges.dtype, copy=False) # Compute the bin indices, and for values that lie exactly on # last_edge we need to subtract one f_indices = _unsigned_subtract(tmp_a, first_edge) * norm indices = f_indices.astype(np.intp) indices[indices == n_equal_bins] -= 1 # The index computation is not guaranteed to give exactly # consistent results within ~1 ULP of the bin edges. decrement = tmp_a < bin_edges[indices] indices[decrement] -= 1 # The last bin includes the right edge. The other bins do not. increment = ((tmp_a >= bin_edges[indices + 1]) & (indices != n_equal_bins - 1)) indices[increment] += 1 # We now compute the histogram using bincount if ntype.kind == 'c': n.real += np.bincount(indices, weights=tmp_w.real, minlength=n_equal_bins) n.imag += np.bincount(indices, weights=tmp_w.imag, minlength=n_equal_bins) else: n += np.bincount(indices, weights=tmp_w, minlength=n_equal_bins).astype(ntype) else: # Compute via cumulative histogram cum_n = np.zeros(bin_edges.shape, ntype) if weights is None: for i in _range(0, len(a), BLOCK): sa = np.sort(a[i:i + BLOCK]) cum_n += _search_sorted_inclusive(sa, bin_edges) else: zero = np.zeros(1, dtype=ntype) for i in _range(0, len(a), BLOCK): tmp_a = a[i:i + BLOCK] tmp_w = weights[i:i + BLOCK] sorting_index = np.argsort(tmp_a) sa = tmp_a[sorting_index] sw = tmp_w[sorting_index] cw = np.concatenate((zero, sw.cumsum())) bin_index = _search_sorted_inclusive(sa, bin_edges) cum_n += cw[bin_index] n = np.diff(cum_n) # density overrides the normed keyword if density is not None: if normed is not None: # 2018-06-13, numpy 1.15.0 (this was not noisily deprecated in 1.6) warnings.warn( "The normed argument is ignored when density is provided. " "In future passing both will result in an error.", DeprecationWarning, stacklevel=2) normed = None if density: db = np.array(np.diff(bin_edges), float) return n / db / n.sum(), bin_edges elif normed: # 2018-06-13, numpy 1.15.0 (this was not noisily deprecated in 1.6) warnings.warn( "Passing `normed=True` on non-uniform bins has always been " "broken, and computes neither the probability density " "function nor the probability mass function. " "The result is only correct if the bins are uniform, when " "density=True will produce the same result anyway. " "The argument will be removed in a future version of " "numpy.", np.VisibleDeprecationWarning, stacklevel=2) # this normalization is incorrect, but db = np.array(np.diff(bin_edges), float) return n / (n * db).sum(), bin_edges else: if normed is not None: # 2018-06-13, numpy 1.15.0 (this was not noisily deprecated in 1.6) warnings.warn( "Passing normed=False is deprecated, and has no effect. " "Consider passing the density argument instead.", DeprecationWarning, stacklevel=2) return n, bin_edges
def _do_append(arr, pad_chunk, axis): return np.concatenate((arr, pad_chunk.astype(arr.dtype, copy=False)), axis=axis)
def _pad_sym(arr, pad_amt, method, axis=-1): """ Pad `axis` of `arr` by symmetry. Parameters ---------- arr : ndarray Input array of arbitrary shape. pad_amt : tuple of ints, length 2 Padding to (prepend, append) along `axis`. method : str Controls method of symmetry; options are 'even' or 'odd'. axis : int Axis along which to pad `arr`. Returns ------- padarr : ndarray Output array, with `pad_amt[0]` values prepended and `pad_amt[1]` values appended along `axis`. Both regions are padded with symmetric values from the original array. Notes ----- This algorithm DOES pad with repetition, i.e. the edges are repeated. For padding without repeated edges, use `mode='reflect'`. The modes 'reflect', 'symmetric', and 'wrap' must be padded with a single function, lest the indexing tricks in non-integer multiples of the original shape would violate repetition in the final iteration. """ # Implicit booleanness to test for zero (or None) in any scalar type if pad_amt[0] == 0 and pad_amt[1] == 0: return arr ########################################################################## # Prepended region # Slice off a reverse indexed chunk from near edge to pad `arr` before sym_slice = _slice_first(arr.shape, pad_amt[0], axis=axis) rev_idx = _slice_at_axis(arr.shape, slice(None, None, -1), axis=axis) sym_chunk1 = arr[sym_slice][rev_idx] # Memory/computationally more expensive, only do this if `method='odd'` if 'odd' in method and pad_amt[0] > 0: edge_slice1 = _slice_first(arr.shape, 1, axis=axis) edge_chunk = arr[edge_slice1] sym_chunk1 = 2 * edge_chunk - sym_chunk1 del edge_chunk ########################################################################## # Appended region # Slice off a reverse indexed chunk from far edge to pad `arr` after sym_slice = _slice_last(arr.shape, pad_amt[1], axis=axis) sym_chunk2 = arr[sym_slice][rev_idx] if 'odd' in method: edge_slice2 = _slice_last(arr.shape, 1, axis=axis) edge_chunk = arr[edge_slice2] sym_chunk2 = 2 * edge_chunk - sym_chunk2 del edge_chunk # Concatenate `arr` with both chunks, extending along `axis` return np.concatenate((sym_chunk1, arr, sym_chunk2), axis=axis)
def in1d(ar1, ar2, assume_unique=False, invert=False): """ Test whether each element of a 1-D array is also present in a second array. Returns a boolean array the same length as `ar1` that is True where an element of `ar1` is in `ar2` and False otherwise. We recommend using :func:`isin` instead of `in1d` for new code. Parameters ---------- ar1 : (M,) array_like Input array. ar2 : array_like The values against which to test each value of `ar1`. assume_unique : bool, optional If True, the input arrays are both assumed to be unique, which can speed up the calculation. Default is False. invert : bool, optional If True, the values in the returned array are inverted (that is, False where an element of `ar1` is in `ar2` and True otherwise). Default is False. ``np.in1d(a, b, invert=True)`` is equivalent to (but is faster than) ``np.invert(in1d(a, b))``. .. versionadded:: 1.8.0 Returns ------- in1d : (M,) ndarray, bool The values `ar1[in1d]` are in `ar2`. See Also -------- isin : Version of this function that preserves the shape of ar1. numpy.lib.arraysetops : Module with a number of other functions for performing set operations on arrays. Notes ----- `in1d` can be considered as an element-wise function version of the python keyword `in`, for 1-D sequences. ``in1d(a, b)`` is roughly equivalent to ``np.array([item in b for item in a])``. However, this idea fails if `ar2` is a set, or similar (non-sequence) container: As ``ar2`` is converted to an array, in those cases ``asarray(ar2)`` is an object array rather than the expected array of contained values. .. versionadded:: 1.4.0 Examples -------- >>> test = np.array([0, 1, 2, 5, 0]) >>> states = [0, 2] >>> mask = np.in1d(test, states) >>> mask array([ True, False, True, False, True]) >>> test[mask] array([0, 2, 0]) >>> mask = np.in1d(test, states, invert=True) >>> mask array([False, True, False, True, False]) >>> test[mask] array([1, 5]) """ # Ravel both arrays, behavior for the first array could be different ar1 = np.asarray(ar1).ravel() ar2 = np.asarray(ar2).ravel() # Check if one of the arrays may contain arbitrary objects contains_object = ar1.dtype.hasobject or ar2.dtype.hasobject # This code is run when # a) the first condition is true, making the code significantly faster # b) the second condition is true (i.e. `ar1` or `ar2` may contain # arbitrary objects), since then sorting is not guaranteed to work if len(ar2) < 10 * len(ar1)**0.145 or contains_object: if invert: mask = np.ones(len(ar1), dtype=bool) for a in ar2: mask &= (ar1 != a) else: mask = np.zeros(len(ar1), dtype=bool) for a in ar2: mask |= (ar1 == a) return mask # Otherwise use sorting if not assume_unique: ar1, rev_idx = np.unique(ar1, return_inverse=True) ar2 = np.unique(ar2) ar = np.concatenate((ar1, ar2)) # We need this to be a stable sort, so always use 'mergesort' # here. The values from the first array should always come before # the values from the second array. order = ar.argsort(kind='mergesort') sar = ar[order] if invert: bool_ar = (sar[1:] != sar[:-1]) else: bool_ar = (sar[1:] == sar[:-1]) flag = np.concatenate((bool_ar, [invert])) ret = np.empty(ar.shape, dtype=bool) ret[order] = flag if assume_unique: return ret[:len(ar1)] else: return ret[rev_idx]
def join_by(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2', defaults=None, usemask=True, asrecarray=False): """ Join arrays `r1` and `r2` on key `key`. The key should be either a string or a sequence of string corresponding to the fields used to join the array. An exception is raised if the `key` field cannot be found in the two input arrays. Neither `r1` nor `r2` should have any duplicates along `key`: the presence of duplicates will make the output quite unreliable. Note that duplicates are not looked for by the algorithm. Parameters ---------- key : {string, sequence} A string or a sequence of strings corresponding to the fields used for comparison. r1, r2 : arrays Structured arrays. jointype : {'inner', 'outer', 'leftouter'}, optional If 'inner', returns the elements common to both r1 and r2. If 'outer', returns the common elements as well as the elements of r1 not in r2 and the elements of not in r2. If 'leftouter', returns the common elements and the elements of r1 not in r2. r1postfix : string, optional String appended to the names of the fields of r1 that are present in r2 but absent of the key. r2postfix : string, optional String appended to the names of the fields of r2 that are present in r1 but absent of the key. defaults : {dictionary}, optional Dictionary mapping field names to the corresponding default values. usemask : {True, False}, optional Whether to return a MaskedArray (or MaskedRecords is `asrecarray==True`) or a ndarray. asrecarray : {False, True}, optional Whether to return a recarray (or MaskedRecords if `usemask==True`) or just a flexible-type ndarray. Notes ----- * The output is sorted along the key. * A temporary array is formed by dropping the fields not in the key for the two arrays and concatenating the result. This array is then sorted, and the common entries selected. The output is constructed by filling the fields with the selected entries. Matching is not preserved if there are some duplicates... """ # Check jointype if jointype not in ('inner', 'outer', 'leftouter'): raise ValueError("The 'jointype' argument should be in 'inner', " "'outer' or 'leftouter' (got '%s' instead)" % jointype) # If we have a single key, put it in a tuple if isinstance(key, basestring): key = (key, ) # Check the keys if len(set(key)) != len(key): dup = next(x for n, x in enumerate(key) if x in key[n + 1:]) raise ValueError("duplicate join key %r" % dup) for name in key: if name not in r1.dtype.names: raise ValueError('r1 does not have key field %r' % name) if name not in r2.dtype.names: raise ValueError('r2 does not have key field %r' % name) # Make sure we work with ravelled arrays r1 = r1.ravel() r2 = r2.ravel() # Fixme: nb2 below is never used. Commenting out for pyflakes. # (nb1, nb2) = (len(r1), len(r2)) nb1 = len(r1) (r1names, r2names) = (r1.dtype.names, r2.dtype.names) # Check the names for collision collisions = (set(r1names) & set(r2names)) - set(key) if collisions and not (r1postfix or r2postfix): msg = "r1 and r2 contain common names, r1postfix and r2postfix " msg += "can't both be empty" raise ValueError(msg) # Make temporary arrays of just the keys # (use order of keys in `r1` for back-compatibility) key1 = [n for n in r1names if n in key] r1k = _keep_fields(r1, key1) r2k = _keep_fields(r2, key1) # Concatenate the two arrays for comparison aux = ma.concatenate((r1k, r2k)) idx_sort = aux.argsort(order=key) aux = aux[idx_sort] # # Get the common keys flag_in = ma.concatenate(([False], aux[1:] == aux[:-1])) flag_in[:-1] = flag_in[1:] + flag_in[:-1] idx_in = idx_sort[flag_in] idx_1 = idx_in[(idx_in < nb1)] idx_2 = idx_in[(idx_in >= nb1)] - nb1 (r1cmn, r2cmn) = (len(idx_1), len(idx_2)) if jointype == 'inner': (r1spc, r2spc) = (0, 0) elif jointype == 'outer': idx_out = idx_sort[~flag_in] idx_1 = np.concatenate((idx_1, idx_out[(idx_out < nb1)])) idx_2 = np.concatenate((idx_2, idx_out[(idx_out >= nb1)] - nb1)) (r1spc, r2spc) = (len(idx_1) - r1cmn, len(idx_2) - r2cmn) elif jointype == 'leftouter': idx_out = idx_sort[~flag_in] idx_1 = np.concatenate((idx_1, idx_out[(idx_out < nb1)])) (r1spc, r2spc) = (len(idx_1) - r1cmn, 0) # Select the entries from each input (s1, s2) = (r1[idx_1], r2[idx_2]) # # Build the new description of the output array ....... # Start with the key fields ndtype = get_fieldspec(r1k.dtype) # Add the fields from r1 for fname, fdtype in get_fieldspec(r1.dtype): if fname not in key: ndtype.append((fname, fdtype)) # Add the fields from r2 for fname, fdtype in get_fieldspec(r2.dtype): # Have we seen the current name already ? # we need to rebuild this list every time names = list(name for name, dtype in ndtype) try: nameidx = names.index(fname) except ValueError: #... we haven't: just add the description to the current list ndtype.append((fname, fdtype)) else: # collision _, cdtype = ndtype[nameidx] if fname in key: # The current field is part of the key: take the largest dtype ndtype[nameidx] = (fname, max(fdtype, cdtype)) else: # The current field is not part of the key: add the suffixes, # and place the new field adjacent to the old one ndtype[nameidx:nameidx + 1] = [(fname + r1postfix, cdtype), (fname + r2postfix, fdtype)] # Rebuild a dtype from the new fields ndtype = np.dtype(ndtype) # Find the largest nb of common fields : # r1cmn and r2cmn should be equal, but... cmn = max(r1cmn, r2cmn) # Construct an empty array output = ma.masked_all((cmn + r1spc + r2spc, ), dtype=ndtype) names = output.dtype.names for f in r1names: selected = s1[f] if f not in names or (f in r2names and not r2postfix and f not in key): f += r1postfix current = output[f] current[:r1cmn] = selected[:r1cmn] if jointype in ('outer', 'leftouter'): current[cmn:cmn + r1spc] = selected[r1cmn:] for f in r2names: selected = s2[f] if f not in names or (f in r1names and not r1postfix and f not in key): f += r2postfix current = output[f] current[:r2cmn] = selected[:r2cmn] if (jointype == 'outer') and r2spc: current[-r2spc:] = selected[r2cmn:] # Sort and finalize the output output.sort(order=key) kwargs = dict(usemask=usemask, asrecarray=asrecarray) return _fix_output(_fix_defaults(output, defaults), **kwargs)
def intersect1d(ar1, ar2, assume_unique=False, return_indices=False): """ Find the intersection of two arrays. Return the sorted, unique values that are in both of the input arrays. Parameters ---------- ar1, ar2 : array_like Input arrays. Will be flattened if not already 1D. assume_unique : bool If True, the input arrays are both assumed to be unique, which can speed up the calculation. Default is False. return_indices : bool If True, the indices which correspond to the intersection of the two arrays are returned. The first instance of a value is used if there are multiple. Default is False. .. versionadded:: 1.15.0 Returns ------- intersect1d : ndarray Sorted 1D array of common and unique elements. comm1 : ndarray The indices of the first occurrences of the common values in `ar1`. Only provided if `return_indices` is True. comm2 : ndarray The indices of the first occurrences of the common values in `ar2`. Only provided if `return_indices` is True. See Also -------- numpy.lib.arraysetops : Module with a number of other functions for performing set operations on arrays. Examples -------- >>> np.intersect1d([1, 3, 4, 3], [3, 1, 2, 1]) array([1, 3]) To intersect more than two arrays, use functools.reduce: >>> from functools import reduce >>> reduce(np.intersect1d, ([1, 3, 4, 3], [3, 1, 2, 1], [6, 3, 4, 2])) array([3]) To return the indices of the values common to the input arrays along with the intersected values: >>> x = np.array([1, 1, 2, 3, 4]) >>> y = np.array([2, 1, 4, 6]) >>> xy, x_ind, y_ind = np.intersect1d(x, y, return_indices=True) >>> x_ind, y_ind (array([0, 2, 4]), array([1, 0, 2])) >>> xy, x[x_ind], y[y_ind] (array([1, 2, 4]), array([1, 2, 4]), array([1, 2, 4])) """ ar1 = np.asanyarray(ar1) ar2 = np.asanyarray(ar2) if not assume_unique: if return_indices: ar1, ind1 = unique(ar1, return_index=True) ar2, ind2 = unique(ar2, return_index=True) else: ar1 = unique(ar1) ar2 = unique(ar2) else: ar1 = ar1.ravel() ar2 = ar2.ravel() aux = np.concatenate((ar1, ar2)) if return_indices: aux_sort_indices = np.argsort(aux, kind='mergesort') aux = aux[aux_sort_indices] else: aux.sort() mask = aux[1:] == aux[:-1] int1d = aux[:-1][mask] if return_indices: ar1_indices = aux_sort_indices[:-1][mask] ar2_indices = aux_sort_indices[1:][mask] - ar1.size if not assume_unique: ar1_indices = ind1[ar1_indices] ar2_indices = ind2[ar2_indices] return int1d, ar1_indices, ar2_indices else: return int1d