Esempio n. 1
0
 def test_testUfuncs1(self):
     # Test various functions such as sin, cos.
     (x, y, a10, m1, m2, xm, ym, z, zm, xf, s) = self.d
     assert_(eq(np.cos(x), cos(xm)))
     assert_(eq(np.cosh(x), cosh(xm)))
     assert_(eq(np.sin(x), sin(xm)))
     assert_(eq(np.sinh(x), sinh(xm)))
     assert_(eq(np.tan(x), tan(xm)))
     assert_(eq(np.tanh(x), tanh(xm)))
     with np.errstate(divide='ignore', invalid='ignore'):
         assert_(eq(np.sqrt(abs(x)), sqrt(xm)))
         assert_(eq(np.log(abs(x)), log(xm)))
         assert_(eq(np.log10(abs(x)), log10(xm)))
     assert_(eq(np.exp(x), exp(xm)))
     assert_(eq(np.arcsin(z), arcsin(zm)))
     assert_(eq(np.arccos(z), arccos(zm)))
     assert_(eq(np.arctan(z), arctan(zm)))
     assert_(eq(np.arctan2(x, y), arctan2(xm, ym)))
     assert_(eq(np.absolute(x), absolute(xm)))
     assert_(eq(np.equal(x, y), equal(xm, ym)))
     assert_(eq(np.not_equal(x, y), not_equal(xm, ym)))
     assert_(eq(np.less(x, y), less(xm, ym)))
     assert_(eq(np.greater(x, y), greater(xm, ym)))
     assert_(eq(np.less_equal(x, y), less_equal(xm, ym)))
     assert_(eq(np.greater_equal(x, y), greater_equal(xm, ym)))
     assert_(eq(np.conjugate(x), conjugate(xm)))
     assert_(eq(np.concatenate((x, y)), concatenate((xm, ym))))
     assert_(eq(np.concatenate((x, y)), concatenate((x, y))))
     assert_(eq(np.concatenate((x, y)), concatenate((xm, y))))
     assert_(eq(np.concatenate((x, y, x)), concatenate((x, ym, x))))
Esempio n. 2
0
    def test_exceptions(self):
        # test axis must be in bounds
        for ndim in [1, 2, 3]:
            a = np.ones((1,)*ndim)
            np.concatenate((a, a), axis=0)  # OK
            assert_raises(np.AxisError, np.concatenate, (a, a), axis=ndim)
            assert_raises(np.AxisError, np.concatenate, (a, a), axis=-(ndim + 1))

        # Scalars cannot be concatenated
        assert_raises(ValueError, concatenate, (0,))
        assert_raises(ValueError, concatenate, (np.array(0),))

        # test shapes must match except for concatenation axis
        a = np.ones((1, 2, 3))
        b = np.ones((2, 2, 3))
        axis = list(range(3))
        for i in range(3):
            np.concatenate((a, b), axis=axis[0])  # OK
            assert_raises(ValueError, np.concatenate, (a, b), axis=axis[1])
            assert_raises(ValueError, np.concatenate, (a, b), axis=axis[2])
            a = np.moveaxis(a, -1, 0)
            b = np.moveaxis(b, -1, 0)
            axis.append(axis.pop(0))

        # No arrays to concatenate raises ValueError
        assert_raises(ValueError, concatenate, ())
Esempio n. 3
0
 def test_hfft(self):
     x = random(14) + 1j*random(14)
     x_herm = np.concatenate((random(1), x, random(1)))
     x = np.concatenate((x_herm, x[::-1].conj()))
     assert_array_almost_equal(np.fft.fft(x), np.fft.hfft(x_herm))
     assert_array_almost_equal(np.fft.hfft(x_herm) / np.sqrt(30),
                               np.fft.hfft(x_herm, norm="ortho"))
Esempio n. 4
0
    def test_large_concatenate_axis_None(self):
        # When no axis is given, concatenate uses flattened versions.
        # This also had a bug with many arrays (see gh-5979).
        x = np.arange(1, 100)
        r = np.concatenate(x, None)
        assert_array_equal(x, r)

        # This should probably be deprecated:
        r = np.concatenate(x, 100)  # axis is >= MAXDIMS
        assert_array_equal(x, r)
Esempio n. 5
0
    def test_weights(self):
        v = np.random.rand(100)
        w = np.ones(100) * 5
        a, b = histogram(v)
        na, nb = histogram(v, density=True)
        wa, wb = histogram(v, weights=w)
        nwa, nwb = histogram(v, weights=w, density=True)
        assert_array_almost_equal(a * 5, wa)
        assert_array_almost_equal(na, nwa)

        # Check weights are properly applied.
        v = np.linspace(0, 10, 10)
        w = np.concatenate((np.zeros(5), np.ones(5)))
        wa, wb = histogram(v, bins=np.arange(11), weights=w)
        assert_array_almost_equal(wa, w)

        # Check with integer weights
        wa, wb = histogram([1, 2, 2, 4], bins=4, weights=[4, 3, 2, 1])
        assert_array_equal(wa, [4, 5, 0, 1])
        wa, wb = histogram([1, 2, 2, 4],
                           bins=4,
                           weights=[4, 3, 2, 1],
                           density=True)
        assert_array_almost_equal(wa, np.array([4, 5, 0, 1]) / 10. / 3. * 4)

        # Check weights with non-uniform bin widths
        a, b = histogram(np.arange(9), [0, 1, 3, 6, 10],
                         weights=[2, 1, 1, 1, 1, 1, 1, 1, 1],
                         density=True)
        assert_almost_equal(a, [.2, .1, .1, .075])
Esempio n. 6
0
def union1d(ar1, ar2):
    """
    Find the union of two arrays.

    Return the unique, sorted array of values that are in either of the two
    input arrays.

    Parameters
    ----------
    ar1, ar2 : array_like
        Input arrays. They are flattened if they are not already 1D.

    Returns
    -------
    union1d : ndarray
        Unique, sorted union of the input arrays.

    See Also
    --------
    numpy.lib.arraysetops : Module with a number of other functions for
                            performing set operations on arrays.

    Examples
    --------
    >>> np.union1d([-1, 0, 1], [-2, 0, 2])
    array([-2, -1,  0,  1,  2])

    To find the union of more than two arrays, use functools.reduce:

    >>> from functools import reduce
    >>> reduce(np.union1d, ([1, 3, 4, 3], [3, 1, 2, 1], [6, 3, 4, 2]))
    array([1, 2, 3, 4, 6])
    """
    return unique(np.concatenate((ar1, ar2), axis=None))
Esempio n. 7
0
def _unique1d(ar,
              return_index=False,
              return_inverse=False,
              return_counts=False):
    """
    Find the unique elements of an array, ignoring shape.
    """
    ar = np.asanyarray(ar).flatten()

    optional_indices = return_index or return_inverse

    if optional_indices:
        perm = ar.argsort(kind='mergesort' if return_index else 'quicksort')
        aux = ar[perm]
    else:
        ar.sort()
        aux = ar
    mask = np.empty(aux.shape, dtype=np.bool_)
    mask[:1] = True
    mask[1:] = aux[1:] != aux[:-1]

    ret = (aux[mask], )
    if return_index:
        ret += (perm[mask], )
    if return_inverse:
        imask = np.cumsum(mask) - 1
        inv_idx = np.empty(mask.shape, dtype=np.intp)
        inv_idx[perm] = imask
        ret += (inv_idx, )
    if return_counts:
        idx = np.concatenate(np.nonzero(mask) + ([mask.size], ))
        ret += (np.diff(idx), )
    return ret
Esempio n. 8
0
def _search_sorted_inclusive(a, v):
    """
    Like `searchsorted`, but where the last item in `v` is placed on the right.

    In the context of a histogram, this makes the last bin edge inclusive
    """
    return np.concatenate(
        (a.searchsorted(v[:-1], 'left'), a.searchsorted(v[-1:], 'right')))
Esempio n. 9
0
    def test_polyfit(self):
        c = np.array([3., 2., 1.])
        x = np.linspace(0, 2, 7)
        y = np.polyval(c, x)
        err = [1, -1, 1, -1, 1, -1, 1]
        weights = np.arange(8, 1, -1)**2/7.0

        # Check exception when too few points for variance estimate. Note that
        # the Bayesian estimate requires the number of data points to exceed
        # degree + 3.
        assert_raises(ValueError, np.polyfit,
                      [0, 1, 3], [0, 1, 3], deg=0, cov=True)

        # check 1D case
        m, cov = np.polyfit(x, y+err, 2, cov=True)
        est = [3.8571, 0.2857, 1.619]
        assert_almost_equal(est, m, decimal=4)
        val0 = [[2.9388, -5.8776, 1.6327],
                [-5.8776, 12.7347, -4.2449],
                [1.6327, -4.2449, 2.3220]]
        assert_almost_equal(val0, cov, decimal=4)

        m2, cov2 = np.polyfit(x, y+err, 2, w=weights, cov=True)
        assert_almost_equal([4.8927, -1.0177, 1.7768], m2, decimal=4)
        val = [[8.7929, -10.0103, 0.9756],
               [-10.0103, 13.6134, -1.8178],
               [0.9756, -1.8178, 0.6674]]
        assert_almost_equal(val, cov2, decimal=4)

        # check 2D (n,1) case
        y = y[:, np.newaxis]
        c = c[:, np.newaxis]
        assert_almost_equal(c, np.polyfit(x, y, 2))
        # check 2D (n,2) case
        yy = np.concatenate((y, y), axis=1)
        cc = np.concatenate((c, c), axis=1)
        assert_almost_equal(cc, np.polyfit(x, yy, 2))

        m, cov = np.polyfit(x, yy + np.array(err)[:, np.newaxis], 2, cov=True)
        assert_almost_equal(est, m[:, 0], decimal=4)
        assert_almost_equal(est, m[:, 1], decimal=4)
        assert_almost_equal(val0, cov[:, :, 0], decimal=4)
        assert_almost_equal(val0, cov[:, :, 1], decimal=4)
Esempio n. 10
0
    def test_concatenate_axis_None(self):
        a = np.arange(4, dtype=np.float64).reshape((2, 2))
        b = list(range(3))
        c = ['x']
        r = np.concatenate((a, a), axis=None)
        assert_equal(r.dtype, a.dtype)
        assert_equal(r.ndim, 1)
        r = np.concatenate((a, b), axis=None)
        assert_equal(r.size, a.size + len(b))
        assert_equal(r.dtype, a.dtype)
        r = np.concatenate((a, b, c), axis=None)
        d = array(['0.0', '1.0', '2.0', '3.0',
                   '0', '1', '2', 'x'])
        assert_array_equal(r, d)

        out = np.zeros(a.size + len(b))
        r = np.concatenate((a, b), axis=None)
        rout = np.concatenate((a, b), axis=None, out=out)
        assert_(out is rout)
        assert_equal(r, rout)
Esempio n. 11
0
def setxor1d(ar1, ar2, assume_unique=False):
    """
    Find the set exclusive-or of two arrays.

    Return the sorted, unique values that are in only one (not both) of the
    input arrays.

    Parameters
    ----------
    ar1, ar2 : array_like
        Input arrays.
    assume_unique : bool
        If True, the input arrays are both assumed to be unique, which
        can speed up the calculation.  Default is False.

    Returns
    -------
    setxor1d : ndarray
        Sorted 1D array of unique values that are in only one of the input
        arrays.

    Examples
    --------
    >>> a = np.array([1, 2, 3, 2, 4])
    >>> b = np.array([2, 3, 5, 7, 5])
    >>> np.setxor1d(a,b)
    array([1, 4, 5, 7])

    """
    if not assume_unique:
        ar1 = unique(ar1)
        ar2 = unique(ar2)

    aux = np.concatenate((ar1, ar2))
    if aux.size == 0:
        return aux

    aux.sort()
    flag = np.concatenate(([True], aux[1:] != aux[:-1], [True]))
    return aux[flag[1:] & flag[:-1]]
Esempio n. 12
0
def find_duplicates(a, key=None, ignoremask=True, return_index=False):
    """
    Find the duplicates in a structured array along a given key

    Parameters
    ----------
    a : array-like
        Input array
    key : {string, None}, optional
        Name of the fields along which to check the duplicates.
        If None, the search is performed by records
    ignoremask : {True, False}, optional
        Whether masked data should be discarded or considered as duplicates.
    return_index : {False, True}, optional
        Whether to return the indices of the duplicated values.

    Examples
    --------
    >>> from numpy1.lib import recfunctions as rfn
    >>> ndtype = [('a', int)]
    >>> a = np.ma.array([1, 1, 1, 2, 2, 3, 3],
    ...         mask=[0, 0, 1, 0, 0, 0, 1]).view(ndtype)
    >>> rfn.find_duplicates(a, ignoremask=True, return_index=True)
    ... # XXX: judging by the output, the ignoremask flag has no effect
    """
    a = np.asanyarray(a).ravel()
    # Get a dictionary of fields
    fields = get_fieldstructure(a.dtype)
    # Get the sorting data (by selecting the corresponding field)
    base = a
    if key:
        for f in fields[key]:
            base = base[f]
        base = base[key]
    # Get the sorting indices and the sorted data
    sortidx = base.argsort()
    sortedbase = base[sortidx]
    sorteddata = sortedbase.filled()
    # Compare the sorting data
    flag = (sorteddata[:-1] == sorteddata[1:])
    # If masked data must be ignored, set the flag to false where needed
    if ignoremask:
        sortedmask = sortedbase.recordmask
        flag[sortedmask[1:]] = False
    flag = np.concatenate(([False], flag))
    # We need to take the point on the left as well (else we're missing it)
    flag[:-1] = flag[:-1] + flag[1:]
    duplicates = a[sortidx][flag]
    if return_index:
        return (duplicates, sortidx[flag])
    else:
        return duplicates
Esempio n. 13
0
    def test_simple(self):
        """
        Straightforward testing with a mixture of linspace data (for
        consistency). All test values have been precomputed and the values
        shouldn't change
        """
        # Some basic sanity checking, with some fixed data.
        # Checking for the correct number of bins
        basic_test = {
            50: {
                'fd': 4,
                'scott': 4,
                'rice': 8,
                'sturges': 7,
                'doane': 8,
                'sqrt': 8,
                'auto': 7
            },
            500: {
                'fd': 8,
                'scott': 8,
                'rice': 16,
                'sturges': 10,
                'doane': 12,
                'sqrt': 23,
                'auto': 10
            },
            5000: {
                'fd': 17,
                'scott': 17,
                'rice': 35,
                'sturges': 14,
                'doane': 17,
                'sqrt': 71,
                'auto': 17
            }
        }

        for testlen, expectedResults in basic_test.items():
            # Create some sort of non uniform data to test with
            # (2 peak uniform mixture)
            x1 = np.linspace(-10, -1, testlen // 5 * 2)
            x2 = np.linspace(1, 10, testlen // 5 * 3)
            x = np.concatenate((x1, x2))
            for estimator, numbins in expectedResults.items():
                a, b = np.histogram(x, estimator)
                assert_equal(len(a),
                             numbins,
                             err_msg="For the {0} estimator "
                             "with datasize of {1}".format(estimator, testlen))
Esempio n. 14
0
def _pad_wrap(arr, pad_amt, axis=-1):
    """
    Pad `axis` of `arr` via wrapping.

    Parameters
    ----------
    arr : ndarray
        Input array of arbitrary shape.
    pad_amt : tuple of ints, length 2
        Padding to (prepend, append) along `axis`.
    axis : int
        Axis along which to pad `arr`.

    Returns
    -------
    padarr : ndarray
        Output array, with `pad_amt[0]` values prepended and `pad_amt[1]`
        values appended along `axis`. Both regions are padded wrapped values
        from the opposite end of `axis`.

    Notes
    -----
    This method of padding is also known as 'tile' or 'tiling'.

    The modes 'reflect', 'symmetric', and 'wrap' must be padded with a
    single function, lest the indexing tricks in non-integer multiples of the
    original shape would violate repetition in the final iteration.

    """
    # Implicit booleanness to test for zero (or None) in any scalar type
    if pad_amt[0] == 0 and pad_amt[1] == 0:
        return arr

    ##########################################################################
    # Prepended region

    # Slice off a reverse indexed chunk from near edge to pad `arr` before
    wrap_slice = _slice_last(arr.shape, pad_amt[0], axis=axis)
    wrap_chunk1 = arr[wrap_slice]

    ##########################################################################
    # Appended region

    # Slice off a reverse indexed chunk from far edge to pad `arr` after
    wrap_slice = _slice_first(arr.shape, pad_amt[1], axis=axis)
    wrap_chunk2 = arr[wrap_slice]

    # Concatenate `arr` with both chunks, extending along `axis`
    return np.concatenate((wrap_chunk1, arr, wrap_chunk2), axis=axis)
Esempio n. 15
0
 def test_testAddSumProd(self):
     # Test add, sum, product.
     (x, y, a10, m1, m2, xm, ym, z, zm, xf, s) = self.d
     assert_(eq(np.add.reduce(x), add.reduce(x)))
     assert_(eq(np.add.accumulate(x), add.accumulate(x)))
     assert_(eq(4, sum(array(4), axis=0)))
     assert_(eq(4, sum(array(4), axis=0)))
     assert_(eq(np.sum(x, axis=0), sum(x, axis=0)))
     assert_(eq(np.sum(filled(xm, 0), axis=0), sum(xm, axis=0)))
     assert_(eq(np.sum(x, 0), sum(x, 0)))
     assert_(eq(np.product(x, axis=0), product(x, axis=0)))
     assert_(eq(np.product(x, 0), product(x, 0)))
     assert_(eq(np.product(filled(xm, 1), axis=0),
                        product(xm, axis=0)))
     if len(s) > 1:
         assert_(eq(np.concatenate((x, y), 1),
                            concatenate((xm, ym), 1)))
         assert_(eq(np.add.reduce(x, 1), add.reduce(x, 1)))
         assert_(eq(np.sum(x, 1), sum(x, 1)))
         assert_(eq(np.product(x, 1), product(x, 1)))
Esempio n. 16
0
    def setup(self):
        # An array of all possible float16 values
        self.all_f16 = np.arange(0x10000, dtype=uint16)
        self.all_f16.dtype = float16
        self.all_f32 = np.array(self.all_f16, dtype=float32)
        self.all_f64 = np.array(self.all_f16, dtype=float64)

        # An array of all non-NaN float16 values, in sorted order
        self.nonan_f16 = np.concatenate(
            (np.arange(0xfc00, 0x7fff, -1,
                       dtype=uint16), np.arange(0x0000,
                                                0x7c01,
                                                1,
                                                dtype=uint16)))
        self.nonan_f16.dtype = float16
        self.nonan_f32 = np.array(self.nonan_f16, dtype=float32)
        self.nonan_f64 = np.array(self.nonan_f16, dtype=float64)

        # An array of all finite float16 values, in sorted order
        self.finite_f16 = self.nonan_f16[1:-1]
        self.finite_f32 = self.nonan_f32[1:-1]
        self.finite_f64 = self.nonan_f64[1:-1]
Esempio n. 17
0
    def test_poly(self):
        assert_array_almost_equal(np.poly([3, -np.sqrt(2), np.sqrt(2)]),
                                  [1, -3, -2, 6])

        # From matlab docs
        A = [[1, 2, 3], [4, 5, 6], [7, 8, 0]]
        assert_array_almost_equal(np.poly(A), [1, -6, -72, -27])

        # Should produce real output for perfect conjugates
        assert_(np.isrealobj(np.poly([+1.082j, +2.613j, -2.613j, -1.082j])))
        assert_(np.isrealobj(np.poly([0+1j, -0+-1j, 1+2j,
                                      1-2j, 1.+3.5j, 1-3.5j])))
        assert_(np.isrealobj(np.poly([1j, -1j, 1+2j, 1-2j, 1+3j, 1-3.j])))
        assert_(np.isrealobj(np.poly([1j, -1j, 1+2j, 1-2j])))
        assert_(np.isrealobj(np.poly([1j, -1j, 2j, -2j])))
        assert_(np.isrealobj(np.poly([1j, -1j])))
        assert_(np.isrealobj(np.poly([1, -1])))

        assert_(np.iscomplexobj(np.poly([1j, -1.0000001j])))

        np.random.seed(42)
        a = np.random.randn(100) + 1j*np.random.randn(100)
        assert_(np.isrealobj(np.poly(np.concatenate((a, np.conjugate(a))))))
Esempio n. 18
0
def histogram(a, bins=10, range=None, normed=None, weights=None, density=None):
    r"""
    Compute the histogram of a set of data.

    Parameters
    ----------
    a : array_like
        Input data. The histogram is computed over the flattened array.
    bins : int or sequence of scalars or str, optional
        If `bins` is an int, it defines the number of equal-width
        bins in the given range (10, by default). If `bins` is a
        sequence, it defines the bin edges, including the rightmost
        edge, allowing for non-uniform bin widths.

        .. versionadded:: 1.11.0

        If `bins` is a string, it defines the method used to calculate the
        optimal bin width, as defined by `histogram_bin_edges`.

    range : (float, float), optional
        The lower and upper range of the bins.  If not provided, range
        is simply ``(a.min(), a.max())``.  Values outside the range are
        ignored. The first element of the range must be less than or
        equal to the second. `range` affects the automatic bin
        computation as well. While bin width is computed to be optimal
        based on the actual data within `range`, the bin count will fill
        the entire range including portions containing no data.
    normed : bool, optional

        .. deprecated:: 1.6.0

        This is equivalent to the `density` argument, but produces incorrect
        results for unequal bin widths. It should not be used.

        .. versionchanged:: 1.15.0
            DeprecationWarnings are actually emitted.

    weights : array_like, optional
        An array of weights, of the same shape as `a`.  Each value in
        `a` only contributes its associated weight towards the bin count
        (instead of 1). If `density` is True, the weights are
        normalized, so that the integral of the density over the range
        remains 1.
    density : bool, optional
        If ``False``, the result will contain the number of samples in
        each bin. If ``True``, the result is the value of the
        probability *density* function at the bin, normalized such that
        the *integral* over the range is 1. Note that the sum of the
        histogram values will not be equal to 1 unless bins of unity
        width are chosen; it is not a probability *mass* function.

        Overrides the ``normed`` keyword if given.

    Returns
    -------
    hist : array
        The values of the histogram. See `density` and `weights` for a
        description of the possible semantics.
    bin_edges : array of dtype float
        Return the bin edges ``(length(hist)+1)``.


    See Also
    --------
    histogramdd, bincount, searchsorted, digitize, histogram_bin_edges

    Notes
    -----
    All but the last (righthand-most) bin is half-open.  In other words,
    if `bins` is::

      [1, 2, 3, 4]

    then the first bin is ``[1, 2)`` (including 1, but excluding 2) and
    the second ``[2, 3)``.  The last bin, however, is ``[3, 4]``, which
    *includes* 4.


    Examples
    --------
    >>> np.histogram([1, 2, 1], bins=[0, 1, 2, 3])
    (array([0, 2, 1]), array([0, 1, 2, 3]))
    >>> np.histogram(np.arange(4), bins=np.arange(5), density=True)
    (array([ 0.25,  0.25,  0.25,  0.25]), array([0, 1, 2, 3, 4]))
    >>> np.histogram([[1, 2, 1], [1, 0, 1]], bins=[0,1,2,3])
    (array([1, 4, 1]), array([0, 1, 2, 3]))

    >>> a = np.arange(5)
    >>> hist, bin_edges = np.histogram(a, density=True)
    >>> hist
    array([ 0.5,  0. ,  0.5,  0. ,  0. ,  0.5,  0. ,  0.5,  0. ,  0.5])
    >>> hist.sum()
    2.4999999999999996
    >>> np.sum(hist * np.diff(bin_edges))
    1.0

    .. versionadded:: 1.11.0

    Automated Bin Selection Methods example, using 2 peak random data
    with 2000 points:

    >>> import matplotlib.pyplot as plt
    >>> rng = np.random.RandomState(10)  # deterministic random data
    >>> a = np.hstack((rng.normal(size=1000),
    ...                rng.normal(loc=5, scale=2, size=1000)))
    >>> plt.hist(a, bins='auto')  # arguments are passed to np.histogram
    >>> plt.title("Histogram with 'auto' bins")
    >>> plt.show()

    """
    a, weights = _ravel_and_check_weights(a, weights)

    bin_edges, uniform_bins = _get_bin_edges(a, bins, range, weights)

    # Histogram is an integer or a float array depending on the weights.
    if weights is None:
        ntype = np.dtype(np.intp)
    else:
        ntype = weights.dtype

    # We set a block size, as this allows us to iterate over chunks when
    # computing histograms, to minimize memory usage.
    BLOCK = 65536

    # The fast path uses bincount, but that only works for certain types
    # of weight
    simple_weights = (weights is None or np.can_cast(weights.dtype, np.double)
                      or np.can_cast(weights.dtype, complex))

    if uniform_bins is not None and simple_weights:
        # Fast algorithm for equal bins
        # We now convert values of a to bin indices, under the assumption of
        # equal bin widths (which is valid here).
        first_edge, last_edge, n_equal_bins = uniform_bins

        # Initialize empty histogram
        n = np.zeros(n_equal_bins, ntype)

        # Pre-compute histogram scaling factor
        norm = n_equal_bins / _unsigned_subtract(last_edge, first_edge)

        # We iterate over blocks here for two reasons: the first is that for
        # large arrays, it is actually faster (for example for a 10^8 array it
        # is 2x as fast) and it results in a memory footprint 3x lower in the
        # limit of large arrays.
        for i in _range(0, len(a), BLOCK):
            tmp_a = a[i:i + BLOCK]
            if weights is None:
                tmp_w = None
            else:
                tmp_w = weights[i:i + BLOCK]

            # Only include values in the right range
            keep = (tmp_a >= first_edge)
            keep &= (tmp_a <= last_edge)
            if not np.logical_and.reduce(keep):
                tmp_a = tmp_a[keep]
                if tmp_w is not None:
                    tmp_w = tmp_w[keep]

            # This cast ensures no type promotions occur below, which gh-10322
            # make unpredictable. Getting it wrong leads to precision errors
            # like gh-8123.
            tmp_a = tmp_a.astype(bin_edges.dtype, copy=False)

            # Compute the bin indices, and for values that lie exactly on
            # last_edge we need to subtract one
            f_indices = _unsigned_subtract(tmp_a, first_edge) * norm
            indices = f_indices.astype(np.intp)
            indices[indices == n_equal_bins] -= 1

            # The index computation is not guaranteed to give exactly
            # consistent results within ~1 ULP of the bin edges.
            decrement = tmp_a < bin_edges[indices]
            indices[decrement] -= 1
            # The last bin includes the right edge. The other bins do not.
            increment = ((tmp_a >= bin_edges[indices + 1])
                         & (indices != n_equal_bins - 1))
            indices[increment] += 1

            # We now compute the histogram using bincount
            if ntype.kind == 'c':
                n.real += np.bincount(indices,
                                      weights=tmp_w.real,
                                      minlength=n_equal_bins)
                n.imag += np.bincount(indices,
                                      weights=tmp_w.imag,
                                      minlength=n_equal_bins)
            else:
                n += np.bincount(indices,
                                 weights=tmp_w,
                                 minlength=n_equal_bins).astype(ntype)
    else:
        # Compute via cumulative histogram
        cum_n = np.zeros(bin_edges.shape, ntype)
        if weights is None:
            for i in _range(0, len(a), BLOCK):
                sa = np.sort(a[i:i + BLOCK])
                cum_n += _search_sorted_inclusive(sa, bin_edges)
        else:
            zero = np.zeros(1, dtype=ntype)
            for i in _range(0, len(a), BLOCK):
                tmp_a = a[i:i + BLOCK]
                tmp_w = weights[i:i + BLOCK]
                sorting_index = np.argsort(tmp_a)
                sa = tmp_a[sorting_index]
                sw = tmp_w[sorting_index]
                cw = np.concatenate((zero, sw.cumsum()))
                bin_index = _search_sorted_inclusive(sa, bin_edges)
                cum_n += cw[bin_index]

        n = np.diff(cum_n)

    # density overrides the normed keyword
    if density is not None:
        if normed is not None:
            # 2018-06-13, numpy 1.15.0 (this was not noisily deprecated in 1.6)
            warnings.warn(
                "The normed argument is ignored when density is provided. "
                "In future passing both will result in an error.",
                DeprecationWarning,
                stacklevel=2)
        normed = None

    if density:
        db = np.array(np.diff(bin_edges), float)
        return n / db / n.sum(), bin_edges
    elif normed:
        # 2018-06-13, numpy 1.15.0 (this was not noisily deprecated in 1.6)
        warnings.warn(
            "Passing `normed=True` on non-uniform bins has always been "
            "broken, and computes neither the probability density "
            "function nor the probability mass function. "
            "The result is only correct if the bins are uniform, when "
            "density=True will produce the same result anyway. "
            "The argument will be removed in a future version of "
            "numpy.",
            np.VisibleDeprecationWarning,
            stacklevel=2)

        # this normalization is incorrect, but
        db = np.array(np.diff(bin_edges), float)
        return n / (n * db).sum(), bin_edges
    else:
        if normed is not None:
            # 2018-06-13, numpy 1.15.0 (this was not noisily deprecated in 1.6)
            warnings.warn(
                "Passing normed=False is deprecated, and has no effect. "
                "Consider passing the density argument instead.",
                DeprecationWarning,
                stacklevel=2)
        return n, bin_edges
Esempio n. 19
0
def _do_append(arr, pad_chunk, axis):
    return np.concatenate((arr, pad_chunk.astype(arr.dtype, copy=False)),
                          axis=axis)
Esempio n. 20
0
def _pad_sym(arr, pad_amt, method, axis=-1):
    """
    Pad `axis` of `arr` by symmetry.

    Parameters
    ----------
    arr : ndarray
        Input array of arbitrary shape.
    pad_amt : tuple of ints, length 2
        Padding to (prepend, append) along `axis`.
    method : str
        Controls method of symmetry; options are 'even' or 'odd'.
    axis : int
        Axis along which to pad `arr`.

    Returns
    -------
    padarr : ndarray
        Output array, with `pad_amt[0]` values prepended and `pad_amt[1]`
        values appended along `axis`. Both regions are padded with symmetric
        values from the original array.

    Notes
    -----
    This algorithm DOES pad with repetition, i.e. the edges are repeated.
    For padding without repeated edges, use `mode='reflect'`.

    The modes 'reflect', 'symmetric', and 'wrap' must be padded with a
    single function, lest the indexing tricks in non-integer multiples of the
    original shape would violate repetition in the final iteration.

    """
    # Implicit booleanness to test for zero (or None) in any scalar type
    if pad_amt[0] == 0 and pad_amt[1] == 0:
        return arr

    ##########################################################################
    # Prepended region

    # Slice off a reverse indexed chunk from near edge to pad `arr` before
    sym_slice = _slice_first(arr.shape, pad_amt[0], axis=axis)
    rev_idx = _slice_at_axis(arr.shape, slice(None, None, -1), axis=axis)
    sym_chunk1 = arr[sym_slice][rev_idx]

    # Memory/computationally more expensive, only do this if `method='odd'`
    if 'odd' in method and pad_amt[0] > 0:
        edge_slice1 = _slice_first(arr.shape, 1, axis=axis)
        edge_chunk = arr[edge_slice1]
        sym_chunk1 = 2 * edge_chunk - sym_chunk1
        del edge_chunk

    ##########################################################################
    # Appended region

    # Slice off a reverse indexed chunk from far edge to pad `arr` after
    sym_slice = _slice_last(arr.shape, pad_amt[1], axis=axis)
    sym_chunk2 = arr[sym_slice][rev_idx]

    if 'odd' in method:
        edge_slice2 = _slice_last(arr.shape, 1, axis=axis)
        edge_chunk = arr[edge_slice2]
        sym_chunk2 = 2 * edge_chunk - sym_chunk2
        del edge_chunk

    # Concatenate `arr` with both chunks, extending along `axis`
    return np.concatenate((sym_chunk1, arr, sym_chunk2), axis=axis)
Esempio n. 21
0
def in1d(ar1, ar2, assume_unique=False, invert=False):
    """
    Test whether each element of a 1-D array is also present in a second array.

    Returns a boolean array the same length as `ar1` that is True
    where an element of `ar1` is in `ar2` and False otherwise.

    We recommend using :func:`isin` instead of `in1d` for new code.

    Parameters
    ----------
    ar1 : (M,) array_like
        Input array.
    ar2 : array_like
        The values against which to test each value of `ar1`.
    assume_unique : bool, optional
        If True, the input arrays are both assumed to be unique, which
        can speed up the calculation.  Default is False.
    invert : bool, optional
        If True, the values in the returned array are inverted (that is,
        False where an element of `ar1` is in `ar2` and True otherwise).
        Default is False. ``np.in1d(a, b, invert=True)`` is equivalent
        to (but is faster than) ``np.invert(in1d(a, b))``.

        .. versionadded:: 1.8.0

    Returns
    -------
    in1d : (M,) ndarray, bool
        The values `ar1[in1d]` are in `ar2`.

    See Also
    --------
    isin                  : Version of this function that preserves the
                            shape of ar1.
    numpy.lib.arraysetops : Module with a number of other functions for
                            performing set operations on arrays.

    Notes
    -----
    `in1d` can be considered as an element-wise function version of the
    python keyword `in`, for 1-D sequences. ``in1d(a, b)`` is roughly
    equivalent to ``np.array([item in b for item in a])``.
    However, this idea fails if `ar2` is a set, or similar (non-sequence)
    container:  As ``ar2`` is converted to an array, in those cases
    ``asarray(ar2)`` is an object array rather than the expected array of
    contained values.

    .. versionadded:: 1.4.0

    Examples
    --------
    >>> test = np.array([0, 1, 2, 5, 0])
    >>> states = [0, 2]
    >>> mask = np.in1d(test, states)
    >>> mask
    array([ True, False,  True, False,  True])
    >>> test[mask]
    array([0, 2, 0])
    >>> mask = np.in1d(test, states, invert=True)
    >>> mask
    array([False,  True, False,  True, False])
    >>> test[mask]
    array([1, 5])
    """
    # Ravel both arrays, behavior for the first array could be different
    ar1 = np.asarray(ar1).ravel()
    ar2 = np.asarray(ar2).ravel()

    # Check if one of the arrays may contain arbitrary objects
    contains_object = ar1.dtype.hasobject or ar2.dtype.hasobject

    # This code is run when
    # a) the first condition is true, making the code significantly faster
    # b) the second condition is true (i.e. `ar1` or `ar2` may contain
    #    arbitrary objects), since then sorting is not guaranteed to work
    if len(ar2) < 10 * len(ar1)**0.145 or contains_object:
        if invert:
            mask = np.ones(len(ar1), dtype=bool)
            for a in ar2:
                mask &= (ar1 != a)
        else:
            mask = np.zeros(len(ar1), dtype=bool)
            for a in ar2:
                mask |= (ar1 == a)
        return mask

    # Otherwise use sorting
    if not assume_unique:
        ar1, rev_idx = np.unique(ar1, return_inverse=True)
        ar2 = np.unique(ar2)

    ar = np.concatenate((ar1, ar2))
    # We need this to be a stable sort, so always use 'mergesort'
    # here. The values from the first array should always come before
    # the values from the second array.
    order = ar.argsort(kind='mergesort')
    sar = ar[order]
    if invert:
        bool_ar = (sar[1:] != sar[:-1])
    else:
        bool_ar = (sar[1:] == sar[:-1])
    flag = np.concatenate((bool_ar, [invert]))
    ret = np.empty(ar.shape, dtype=bool)
    ret[order] = flag

    if assume_unique:
        return ret[:len(ar1)]
    else:
        return ret[rev_idx]
Esempio n. 22
0
def join_by(key,
            r1,
            r2,
            jointype='inner',
            r1postfix='1',
            r2postfix='2',
            defaults=None,
            usemask=True,
            asrecarray=False):
    """
    Join arrays `r1` and `r2` on key `key`.

    The key should be either a string or a sequence of string corresponding
    to the fields used to join the array.  An exception is raised if the
    `key` field cannot be found in the two input arrays.  Neither `r1` nor
    `r2` should have any duplicates along `key`: the presence of duplicates
    will make the output quite unreliable. Note that duplicates are not
    looked for by the algorithm.

    Parameters
    ----------
    key : {string, sequence}
        A string or a sequence of strings corresponding to the fields used
        for comparison.
    r1, r2 : arrays
        Structured arrays.
    jointype : {'inner', 'outer', 'leftouter'}, optional
        If 'inner', returns the elements common to both r1 and r2.
        If 'outer', returns the common elements as well as the elements of
        r1 not in r2 and the elements of not in r2.
        If 'leftouter', returns the common elements and the elements of r1
        not in r2.
    r1postfix : string, optional
        String appended to the names of the fields of r1 that are present
        in r2 but absent of the key.
    r2postfix : string, optional
        String appended to the names of the fields of r2 that are present
        in r1 but absent of the key.
    defaults : {dictionary}, optional
        Dictionary mapping field names to the corresponding default values.
    usemask : {True, False}, optional
        Whether to return a MaskedArray (or MaskedRecords is
        `asrecarray==True`) or a ndarray.
    asrecarray : {False, True}, optional
        Whether to return a recarray (or MaskedRecords if `usemask==True`)
        or just a flexible-type ndarray.

    Notes
    -----
    * The output is sorted along the key.
    * A temporary array is formed by dropping the fields not in the key for
      the two arrays and concatenating the result. This array is then
      sorted, and the common entries selected. The output is constructed by
      filling the fields with the selected entries. Matching is not
      preserved if there are some duplicates...

    """
    # Check jointype
    if jointype not in ('inner', 'outer', 'leftouter'):
        raise ValueError("The 'jointype' argument should be in 'inner', "
                         "'outer' or 'leftouter' (got '%s' instead)" %
                         jointype)
    # If we have a single key, put it in a tuple
    if isinstance(key, basestring):
        key = (key, )

    # Check the keys
    if len(set(key)) != len(key):
        dup = next(x for n, x in enumerate(key) if x in key[n + 1:])
        raise ValueError("duplicate join key %r" % dup)
    for name in key:
        if name not in r1.dtype.names:
            raise ValueError('r1 does not have key field %r' % name)
        if name not in r2.dtype.names:
            raise ValueError('r2 does not have key field %r' % name)

    # Make sure we work with ravelled arrays
    r1 = r1.ravel()
    r2 = r2.ravel()
    # Fixme: nb2 below is never used. Commenting out for pyflakes.
    # (nb1, nb2) = (len(r1), len(r2))
    nb1 = len(r1)
    (r1names, r2names) = (r1.dtype.names, r2.dtype.names)

    # Check the names for collision
    collisions = (set(r1names) & set(r2names)) - set(key)
    if collisions and not (r1postfix or r2postfix):
        msg = "r1 and r2 contain common names, r1postfix and r2postfix "
        msg += "can't both be empty"
        raise ValueError(msg)

    # Make temporary arrays of just the keys
    #  (use order of keys in `r1` for back-compatibility)
    key1 = [n for n in r1names if n in key]
    r1k = _keep_fields(r1, key1)
    r2k = _keep_fields(r2, key1)

    # Concatenate the two arrays for comparison
    aux = ma.concatenate((r1k, r2k))
    idx_sort = aux.argsort(order=key)
    aux = aux[idx_sort]
    #
    # Get the common keys
    flag_in = ma.concatenate(([False], aux[1:] == aux[:-1]))
    flag_in[:-1] = flag_in[1:] + flag_in[:-1]
    idx_in = idx_sort[flag_in]
    idx_1 = idx_in[(idx_in < nb1)]
    idx_2 = idx_in[(idx_in >= nb1)] - nb1
    (r1cmn, r2cmn) = (len(idx_1), len(idx_2))
    if jointype == 'inner':
        (r1spc, r2spc) = (0, 0)
    elif jointype == 'outer':
        idx_out = idx_sort[~flag_in]
        idx_1 = np.concatenate((idx_1, idx_out[(idx_out < nb1)]))
        idx_2 = np.concatenate((idx_2, idx_out[(idx_out >= nb1)] - nb1))
        (r1spc, r2spc) = (len(idx_1) - r1cmn, len(idx_2) - r2cmn)
    elif jointype == 'leftouter':
        idx_out = idx_sort[~flag_in]
        idx_1 = np.concatenate((idx_1, idx_out[(idx_out < nb1)]))
        (r1spc, r2spc) = (len(idx_1) - r1cmn, 0)
    # Select the entries from each input
    (s1, s2) = (r1[idx_1], r2[idx_2])
    #
    # Build the new description of the output array .......
    # Start with the key fields
    ndtype = get_fieldspec(r1k.dtype)

    # Add the fields from r1
    for fname, fdtype in get_fieldspec(r1.dtype):
        if fname not in key:
            ndtype.append((fname, fdtype))

    # Add the fields from r2
    for fname, fdtype in get_fieldspec(r2.dtype):
        # Have we seen the current name already ?
        # we need to rebuild this list every time
        names = list(name for name, dtype in ndtype)
        try:
            nameidx = names.index(fname)
        except ValueError:
            #... we haven't: just add the description to the current list
            ndtype.append((fname, fdtype))
        else:
            # collision
            _, cdtype = ndtype[nameidx]
            if fname in key:
                # The current field is part of the key: take the largest dtype
                ndtype[nameidx] = (fname, max(fdtype, cdtype))
            else:
                # The current field is not part of the key: add the suffixes,
                # and place the new field adjacent to the old one
                ndtype[nameidx:nameidx + 1] = [(fname + r1postfix, cdtype),
                                               (fname + r2postfix, fdtype)]
    # Rebuild a dtype from the new fields
    ndtype = np.dtype(ndtype)
    # Find the largest nb of common fields :
    # r1cmn and r2cmn should be equal, but...
    cmn = max(r1cmn, r2cmn)
    # Construct an empty array
    output = ma.masked_all((cmn + r1spc + r2spc, ), dtype=ndtype)
    names = output.dtype.names
    for f in r1names:
        selected = s1[f]
        if f not in names or (f in r2names and not r2postfix and f not in key):
            f += r1postfix
        current = output[f]
        current[:r1cmn] = selected[:r1cmn]
        if jointype in ('outer', 'leftouter'):
            current[cmn:cmn + r1spc] = selected[r1cmn:]
    for f in r2names:
        selected = s2[f]
        if f not in names or (f in r1names and not r1postfix and f not in key):
            f += r2postfix
        current = output[f]
        current[:r2cmn] = selected[:r2cmn]
        if (jointype == 'outer') and r2spc:
            current[-r2spc:] = selected[r2cmn:]
    # Sort and finalize the output
    output.sort(order=key)
    kwargs = dict(usemask=usemask, asrecarray=asrecarray)
    return _fix_output(_fix_defaults(output, defaults), **kwargs)
Esempio n. 23
0
def intersect1d(ar1, ar2, assume_unique=False, return_indices=False):
    """
    Find the intersection of two arrays.

    Return the sorted, unique values that are in both of the input arrays.

    Parameters
    ----------
    ar1, ar2 : array_like
        Input arrays. Will be flattened if not already 1D.
    assume_unique : bool
        If True, the input arrays are both assumed to be unique, which
        can speed up the calculation.  Default is False.
    return_indices : bool
        If True, the indices which correspond to the intersection of the two
        arrays are returned. The first instance of a value is used if there are
        multiple. Default is False.

        .. versionadded:: 1.15.0

    Returns
    -------
    intersect1d : ndarray
        Sorted 1D array of common and unique elements.
    comm1 : ndarray
        The indices of the first occurrences of the common values in `ar1`.
        Only provided if `return_indices` is True.
    comm2 : ndarray
        The indices of the first occurrences of the common values in `ar2`.
        Only provided if `return_indices` is True.


    See Also
    --------
    numpy.lib.arraysetops : Module with a number of other functions for
                            performing set operations on arrays.

    Examples
    --------
    >>> np.intersect1d([1, 3, 4, 3], [3, 1, 2, 1])
    array([1, 3])

    To intersect more than two arrays, use functools.reduce:

    >>> from functools import reduce
    >>> reduce(np.intersect1d, ([1, 3, 4, 3], [3, 1, 2, 1], [6, 3, 4, 2]))
    array([3])

    To return the indices of the values common to the input arrays
    along with the intersected values:
    >>> x = np.array([1, 1, 2, 3, 4])
    >>> y = np.array([2, 1, 4, 6])
    >>> xy, x_ind, y_ind = np.intersect1d(x, y, return_indices=True)
    >>> x_ind, y_ind
    (array([0, 2, 4]), array([1, 0, 2]))
    >>> xy, x[x_ind], y[y_ind]
    (array([1, 2, 4]), array([1, 2, 4]), array([1, 2, 4]))

    """
    ar1 = np.asanyarray(ar1)
    ar2 = np.asanyarray(ar2)

    if not assume_unique:
        if return_indices:
            ar1, ind1 = unique(ar1, return_index=True)
            ar2, ind2 = unique(ar2, return_index=True)
        else:
            ar1 = unique(ar1)
            ar2 = unique(ar2)
    else:
        ar1 = ar1.ravel()
        ar2 = ar2.ravel()

    aux = np.concatenate((ar1, ar2))
    if return_indices:
        aux_sort_indices = np.argsort(aux, kind='mergesort')
        aux = aux[aux_sort_indices]
    else:
        aux.sort()

    mask = aux[1:] == aux[:-1]
    int1d = aux[:-1][mask]

    if return_indices:
        ar1_indices = aux_sort_indices[:-1][mask]
        ar2_indices = aux_sort_indices[1:][mask] - ar1.size
        if not assume_unique:
            ar1_indices = ind1[ar1_indices]
            ar2_indices = ind2[ar2_indices]

        return int1d, ar1_indices, ar2_indices
    else:
        return int1d